In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Carregar dados
file_path = './dados_funcionais_patenteadores.xlsx'
df = pd.read_excel(file_path)

# Colunas alvo (múltiplas saídas)
target_columns = [
    "Interaction in patenting process. Active or passive?",
    "Classification regarding professional orientation",
    "Nature of motivation",
    "Relationship between standards / personal values"
]

X = df.drop(columns=target_columns)
y = df[target_columns]

# Identificar colunas numéricas e categóricas
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

# Pré-processamento
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

# Modelo base
mlp = MLPClassifier(hidden_layer_sizes=(128, 64, 32), max_iter=2000, random_state=42, solver='adam', activation='relu')
multi_output_mlp = MultiOutputClassifier(mlp)

# Pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', multi_output_mlp)
])

# Dividir os dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinar o modelo
pipeline.fit(X_train, y_train)

# Prever
y_pred = pipeline.predict(X_test)
y_pred_df = pd.DataFrame(y_pred, columns=target_columns, index=y_test.index)

# Avaliação por coluna com classification_report
print("=== Acurácia e Métricas por Coluna ===")
accuracies = []
for col in target_columns:
    acc = accuracy_score(y_test[col], y_pred_df[col])
    accuracies.append(acc)
    print(f"\nAcurácia para '{col}': {acc:.2%}")
    print(classification_report(y_test[col], y_pred_df[col], zero_division=0))

# Acurácia média geral
mean_accuracy = np.mean(accuracies)
print(f"\n=== Acurácia média geral: {mean_accuracy:.2%} ===")

# Validação cruzada manual
kf = KFold(n_splits=3, shuffle=True, random_state=42)
cv_scores = []

for train_index, test_index in kf.split(X):
    X_cv_train, X_cv_test = X.iloc[train_index], X.iloc[test_index]
    y_cv_train, y_cv_test = y.iloc[train_index], y.iloc[test_index]
    
    pipeline.fit(X_cv_train, y_cv_train)
    y_cv_pred = pipeline.predict(X_cv_test)
    
    accs = []
    for i, col in enumerate(y.columns):
        acc = accuracy_score(y_cv_test[col], y_cv_pred[:, i])
        accs.append(acc)
    
    cv_scores.append(np.mean(accs))

print("\n=== Acurácia com Validação Cruzada ===")
for i, score in enumerate(cv_scores, 1):
    print(f"Fold {i}: {score:.2%}")
print(f"Acurácia média (cross-val): {np.mean(cv_scores):.2%}")


=== Acurácia e Métricas por Coluna ===

Acurácia para 'Interaction in patenting process. Active or passive?': 88.89%
              precision    recall  f1-score   support

      Active       0.78      0.88      0.82         8
     Passive       0.94      0.89      0.92        19

    accuracy                           0.89        27
   macro avg       0.86      0.88      0.87        27
weighted avg       0.90      0.89      0.89        27


Acurácia para 'Classification regarding professional orientation': 51.85%
                       precision    recall  f1-score   support

      Entrepreneurial       0.45      0.50      0.48        10
               Hybrid       0.56      0.69      0.62        13
Pragmatic traditional       0.00      0.00      0.00         4

             accuracy                           0.52        27
            macro avg       0.34      0.40      0.37        27
         weighted avg       0.44      0.52      0.48        27


Acurácia para 'Nature of motivation'

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Carregar os dados (substitua 'SuaÚltimaColuna' pelo nome real da última coluna)
file_path = './dados_funcionais_patenteadores.xlsx'
df = pd.read_excel(file_path)


# target_column = 'Interaction in patenting process. Active or passive?'  
target_column = 'Classification regarding professional orientation' 
# target_column = 'Nature of motivation' 
# target_column = 'Relationship between standards / personal values' 
X = df.drop(columns=[target_column])
y = df[target_column]

# Pré-processamento: normalização de variáveis numéricas e codificação de variáveis categóricas
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), X.select_dtypes(include=['float64', 'int64']).columns),  # Normaliza dados numéricos
        ('cat', OneHotEncoder(handle_unknown='ignore'), X.select_dtypes(include=['object']).columns)  # Codifica dados categóricos
    ])

# Divisão dos dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Criar o pipeline que aplica o pré-processamento e o modelo MLP
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('mlp', MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=42))
])

# Treinar o modelo
pipeline.fit(X_train, y_train)

# Fazer previsões no conjunto de teste
y_pred = pipeline.predict(X_test)

# Avaliar a acurácia do modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia: {accuracy}')


Acurácia: 0.6296296296296297


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score

file_path = './dados_funcionais_patenteadores.xlsx'
df = pd.read_excel(file_path)

# target_column = 'Classification regarding professional orientation' 
target_column = 'Nature of motivation'
# target_column = 'Relationship between standards / personal values' 
X = df.drop(columns=[target_column])
y = df[target_column]

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), X.select_dtypes(include=['float64', 'int64']).columns),  # Normaliza dados numéricos
        ('cat', OneHotEncoder(handle_unknown='ignore'), X.select_dtypes(include=['object']).columns)  # Codifica dados categóricos
    ])

#dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('mlp', MLPClassifier(hidden_layer_sizes=(128, 64, 32), max_iter=2000, random_state=42, solver='adam', activation='relu'))
])

# treino
pipeline.fit(X_train, y_train)

# previsoes nos testes
y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia: {accuracy}')


cv_scores = cross_val_score(pipeline, X, y, cv=3, scoring='accuracy')
print(f"Acurácia média com validação cruzada: {cv_scores.mean()}")

results_df = pd.DataFrame({
    'Real': y_test,
    'Previsto': y_pred
})

print("\nResultados das Previsões (Classes Reais vs Previsões):")
print(results_df.head(50)) 

print("\nNúmero de acertos por classe:")
print(results_df[results_df['Real'] == results_df['Previsto']].groupby('Real').size())

print("\nPrevisões Erradas (Real vs Previsto):")
print(results_df[results_df['Real'] != results_df['Previsto']])


Acurácia: 0.5185185185185185
Acurácia média com validação cruzada: 0.5343551797040168

Resultados das Previsões (Classes Reais vs Previsões):
          Real   Previsto
55   Intrinsic  Intrinsic
40       Mixed  Extrinsic
19   Extrinsic  Intrinsic
31       Mixed      Mixed
115  Intrinsic  Extrinsic
56   Extrinsic  Extrinsic
69       Mixed      Mixed
105  Extrinsic  Intrinsic
81   Intrinsic  Extrinsic
26   Extrinsic      Mixed
95   Intrinsic  Intrinsic
27   Intrinsic  Intrinsic
64       Mixed  Extrinsic
4    Intrinsic  Extrinsic
97   Intrinsic      Mixed
100      Mixed      Mixed
36   Intrinsic      Mixed
80   Extrinsic  Intrinsic
93       Mixed  Extrinsic
84   Extrinsic  Extrinsic
18   Extrinsic  Extrinsic
10       Mixed      Mixed
122  Extrinsic  Extrinsic
11   Extrinsic  Extrinsic
127      Mixed      Mixed
45   Intrinsic  Intrinsic
70   Intrinsic  Extrinsic

Número de acertos por classe:
Real
Extrinsic    5
Intrinsic    4
Mixed        5
dtype: int64

Previsões Erradas (Real vs Previsto

In [None]:
import tensorflow as tf
import numpy as np
import random

# Definir a semente para garantir reprodutibilidade
seed_value = 42

# Definir semente do Python
random.seed(seed_value)

# Definir semente do NumPy
np.random.seed(seed_value)

# Definir semente do TensorFlow
tf.random.set_seed(seed_value)

# Importações do código original
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Carregar os dados
df = pd.read_excel('./dados_funcionais_patenteadores.xlsx')

# Selecione a coluna alvo e as variáveis de entrada
X = df.drop(columns=['Nature of motivation'])
y = df['Nature of motivation']

# Selecionar apenas as colunas numéricas para normalização
X_numeric = X.select_dtypes(include=['float64', 'int64'])

# Normalização dos dados numéricos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric)

# Selecionar as colunas categóricas para codificação
X_categorical = X.select_dtypes(include=['object'])

# Codificação das variáveis categóricas com OneHotEncoder
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False) 
X_encoded = encoder.fit_transform(X_categorical)

# Concatenar as variáveis numéricas normalizadas com as variáveis categóricas codificadas
X_final = np.concatenate((X_scaled, X_encoded), axis=1)

# Convertendo as variáveis alvo (y_train e y_test) para inteiros
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_final, y_encoded, test_size=0.2, random_state=42)

# --- Construção do Autoencoder ---
input_dim = X_train.shape[1]  # Número total de características de entrada
encoding_dim = 64  # Dimensão da camada comprimida (representação compacta)

# Definir o Autoencoder
input_layer = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dense(encoding_dim, activation='relu')(encoded)

decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(128, activation='relu')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

# Modelo Autoencoder
autoencoder = Model(input_layer, decoded)

# Modelo Encoder (para obter a codificação comprimida)
encoder_model = Model(input_layer, encoded)

# Compilação do Autoencoder
autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')

# Treinar o Autoencoder
autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, validation_data=(X_test, X_test))

# --- Obter as Representações Comprimidas com o Encoder ---
X_train_encoded = encoder_model.predict(X_train)
X_test_encoded = encoder_model.predict(X_test)

# --- Construção do MLP ---
# O MLP agora vai usar as representações comprimidas como entrada
mlp_model = Sequential([
    Dense(128, activation='relu', input_dim=encoding_dim),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(len(np.unique(y_encoded)), activation='softmax')  # Para classificação multi-classe
])

# Compilar o modelo MLP
mlp_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Treinar o MLP
mlp_model.fit(X_train_encoded, y_train, epochs=50, batch_size=32, validation_data=(X_test_encoded, y_test))

# Avaliar a acurácia do MLP
y_pred = mlp_model.predict(X_test_encoded)
y_pred_classes = y_pred.argmax(axis=-1)  # Converter as probabilidades para classes

# Calcular a acurácia
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Acurácia do modelo com Autoencoder + MLP: {accuracy}')


# --- Exibir os resultados das previsões ---
# Criando uma tabela com as classes reais e previstas
results_df = pd.DataFrame({
    'Real': label_encoder.inverse_transform(y_test),
    'Previsto': label_encoder.inverse_transform(y_pred_classes)
})

print("\nResultados das Previsões (Classes Reais vs Previsões):")
print(results_df.head(10))  # Exibe as 10 primeiras linhas para ver o resultado

# Exibir o número total de acertos para cada classe
print("\nNúmero de acertos por classe:")
print(results_df[results_df['Real'] == results_df['Previsto']].groupby('Real').size())

# Exibir as previsões erradas
print("\nPrevisões Erradas (Real vs Previsto):")
print(results_df[results_df['Real'] != results_df['Previsto']])


2025-05-05 14:55:48.589770: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-05 14:55:48.709346: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-05 14:55:48.773212: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746467748.837874 3310317 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746467748.857384 3310317 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746467748.987841 3310317 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

Epoch 1/50


2025-05-05 14:55:51.577208: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step - loss: 0.2673 - val_loss: 0.2585
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.2630 - val_loss: 0.2505
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.2532 - val_loss: 0.2306
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.2294 - val_loss: 0.1865
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.1792 - val_loss: 0.1141
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1063 - val_loss: 0.0583
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0613 - val_loss: 0.0472
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0537 - val_loss: 0.0465
Epoch 9/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.3141 - loss: 1.2979 - val_accuracy: 0.2222 - val_loss: 1.1622
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4063 - loss: 1.0965 - val_accuracy: 0.2963 - val_loss: 1.1552
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4391 - loss: 1.0413 - val_accuracy: 0.3704 - val_loss: 1.0977
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4635 - loss: 1.0238 - val_accuracy: 0.4815 - val_loss: 1.0718
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5115 - loss: 1.0182 - val_accuracy: 0.5926 - val_loss: 1.0630
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5352 - loss: 0.9860 - val_accuracy: 0.5556 - val_loss: 1.0541
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

# === Carregar os dados ===
file_path = './dados_funcionais_patenteadores.xlsx'
df = pd.read_excel(file_path)

# === Escolha da coluna alvo ===
target_column = 'Nature of motivation'

X = df.drop(columns=[target_column])
y = df[target_column]

# === Pré-processamento ===
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

X_processed = preprocessor.fit_transform(X)

# === Codificação da variável alvo ===
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# === Redimensionar X para 3D (samples, features, 1) ===
X_reshaped = X_processed.toarray() if hasattr(X_processed, "toarray") else X_processed
X_reshaped = X_reshaped.reshape((X_reshaped.shape[0], X_reshaped.shape[1], 1))

# === Dividir os dados ===
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_categorical, test_size=0.2, random_state=42)

# === Modelo CNN ===
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(32, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(y_categorical.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# === Treinar o modelo ===
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=1)

# === Avaliação ===
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'\nAcurácia no conjunto de teste: {accuracy:.2%}')


2025-05-07 00:49:47.777358: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-07 00:49:47.959661: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-07 00:49:48.098267: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746589788.224061   17340 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746589788.258141   17340 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746589788.564959   17340 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-05-07 00:49:53.056410: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2025-05-07 00:49:53.056449: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:178] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module
2025-05-07 00:49:53.056454: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] retrieving CUDA diagnostic information for host: caio
2025-05-07 00:49:53.056458: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:190] hostname: caio
2025-05-07 00:49:53.056597: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:197] libcuda reported version is: 550.144.3
2025-05-07 

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.3708 - loss: 1.1021 - val_accuracy: 0.2381 - val_loss: 1.1234
Epoch 2/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4380 - loss: 1.0594 - val_accuracy: 0.2381 - val_loss: 1.1262
Epoch 3/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5695 - loss: 1.0139 - val_accuracy: 0.2857 - val_loss: 1.1553
Epoch 4/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5179 - loss: 0.9603 - val_accuracy: 0.1905 - val_loss: 1.2094
Epoch 5/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.6378 - loss: 0.8863 - val_accuracy: 0.2857 - val_loss: 1.2059
Epoch 6/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6590 - loss: 0.8284 - val_accuracy: 0.2381 - val_loss: 1.3277
Epoch 7/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [12]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

# === Carregar dados ===
file_path = './dados_funcionais_patenteadores.xlsx'
df = pd.read_excel(file_path)

# === Targets ===
target_columns = [
    "Interaction in patenting process. Active or passive?",
    "Classification regarding professional orientation",
    "Nature of motivation",
    "Relationship between standards / personal values"
]

# === Saneamento dos nomes para Keras ===
def sanitize_name(name):
    return re.sub(r'\W+', '_', name)

output_names = {col: sanitize_name(col) for col in target_columns}

# === Separar X e y ===
X = df.drop(columns=target_columns)
y = df[target_columns]

# === Pré-processamento dos dados de entrada ===
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

X_processed = preprocessor.fit_transform(X)
X_array = X_processed.toarray() if hasattr(X_processed, "toarray") else X_processed
X_reshaped = X_array.reshape((X_array.shape[0], X_array.shape[1], 1))

# === Pré-processamento das saídas ===
y_encoders = {}
y_outputs = {}

for col in target_columns:
    le = LabelEncoder()
    y_encoded = le.fit_transform(y[col])
    y_categorical = to_categorical(y_encoded)
    y_encoders[col] = le
    y_outputs[col] = y_categorical

# === Dividir treino/teste ===
indices = np.arange(X_reshaped.shape[0])
X_train, X_test, idx_train, idx_test = train_test_split(X_reshaped, indices, test_size=0.2, random_state=42)

y_train = {output_names[col]: y_outputs[col][idx_train] for col in target_columns}
y_test = {output_names[col]: y_outputs[col][idx_test] for col in target_columns}

# === Modelo CNN Multi-saída ===
input_layer = Input(shape=(X_reshaped.shape[1], 1))

x = Conv1D(64, kernel_size=3, activation='relu')(input_layer)
x = MaxPooling1D(pool_size=2)(x)
x = Dropout(0.3)(x)
x = Conv1D(32, kernel_size=3, activation='relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)

# Saídas para cada coluna alvo
outputs = {
    output_names[col]: Dense(y_outputs[col].shape[1], activation='softmax', name=output_names[col])(x)
    for col in target_columns
}

# Compilação do modelo
model = Model(inputs=input_layer, outputs=outputs)
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss={name: 'categorical_crossentropy' for name in output_names.values()},
    metrics={name: 'accuracy' for name in output_names.values()}
)

# === Treinar ===
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=1)

# === Avaliar ===
results = model.evaluate(X_test, y_test, verbose=0)
metrics_names = model.metrics_names

print("\n=== Acurácias por coluna ===")
for col in target_columns:
    y_true = np.argmax(y_test[output_names[col]], axis=1)
    y_pred_prob = model.predict(X_test, verbose=0)[output_names[col]]
    y_pred = np.argmax(y_pred_prob, axis=1)
    acc = accuracy_score(y_true, y_pred)
    print(f"{col}: {acc:.2%}")


Epoch 1/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 94ms/step - Classification_regarding_professional_orientation_accuracy: 0.2940 - Classification_regarding_professional_orientation_loss: 1.3490 - Interaction_in_patenting_process_Active_or_passive__accuracy: 0.4995 - Interaction_in_patenting_process_Active_or_passive__loss: 0.6921 - Nature_of_motivation_accuracy: 0.3782 - Nature_of_motivation_loss: 1.1010 - Relationship_between_standards_personal_values_accuracy: 0.3162 - Relationship_between_standards_personal_values_loss: 1.1018 - loss: 4.2463 - val_Classification_regarding_professional_orientation_accuracy: 0.6667 - val_Classification_regarding_professional_orientation_loss: 1.1521 - val_Interaction_in_patenting_process_Active_or_passive__accuracy: 0.5714 - val_Interaction_in_patenting_process_Active_or_passive__loss: 0.6833 - val_Nature_of_motivation_accuracy: 0.3333 - val_Nature_of_motivation_loss: 1.1045 - val_Relationship_between_standards_personal_values_a