In [6]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m49.8 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Collecting numpy>=1.23.2
  Downloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m63.5 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m
Collecting pytz>=2020.1
  Downloading pytz-2025.1-py2.py3-none-any.whl (507 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.9/507.9 kB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m
Collecting tzdata>=2022.7
  Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.8/346.8 kB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m

In [8]:
# Importar bibliotecas necessárias
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report

# Passo 1: Carregar os dados
file_path = '/mnt/data/dados_funcionais_patenteadores.xlsx'
dados = pd.read_excel(file_path)

# Passo 2: Tratar valores ausentes
imputer = SimpleImputer(strategy='most_frequent')
dados_imputados = dados.copy()

# Preenchendo valores ausentes para as colunas categóricas
for col in ['Research areas', 'Research subareas', "Bachelor's degree location", 'Master\'s degree location', 
            'Doctorate\'s degree location', 'Gender', 'Have you ever had a patent awarded?',
            'Have you ever had any patents licensed?', 'Have you ever had a patent deposited abroad through Patent Cooperation Treaty (PCT)?',
            'Have any patent request been the result of interaction with the industry?', 'Interaction in patenting process. Active or passive?',
            'Classification regarding professional orientation', 'Nature of motivation', 'Relationship between standards / personal values']:
    dados_imputados[col] = imputer.fit_transform(dados[[col]])

# Passo 3: Converter variáveis categóricas em variáveis numéricas
label_encoder = LabelEncoder()

for col in ['Research areas', 'Research subareas', "Bachelor's degree location", 'Master\'s degree location', 
            'Doctorate\'s degree location', 'Gender', 'Have you ever had a patent awarded?',
            'Have you ever had any patents licensed?', 'Have you ever had a patent deposited abroad through Patent Cooperation Treaty (PCT)?',
            'Have any patent request been the result of interaction with the industry?', 'Interaction in patenting process. Active or passive?',
            'Classification regarding professional orientation', 'Nature of motivation', 'Relationship between standards / personal values']:
    dados_imputados[col] = label_encoder.fit_transform(dados_imputados[col])

# Passo 4: Corrigir a coluna "Birth Interval" (intervalos de anos)
dados_imputados['Birth Interval'] = dados_imputados['Birth Interval'].apply(lambda x: int(x.split('-')[0]) if isinstance(x, str) and '-' in x else x)

# Passo 5: Dividir os dados em variáveis de entrada (X) e variável alvo (y)
X = dados_imputados.drop(columns=['Identifier', 'Have you ever had a patent awarded?'])  # Remover a variável alvo e identificadores
y = dados_imputados['Have you ever had a patent awarded?']  # Variável alvo

# Passo 6: Dividir os dados em treino e teste (80% treino, 20% teste)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Passo 7: Criar o modelo de rede neural
model = MLPClassifier(hidden_layer_sizes=(64, 64), max_iter=500, random_state=42)

# Passo 8: Treinar o modelo
model.fit(X_train, y_train)

# Passo 9: Fazer previsões no conjunto de teste
y_pred = model.predict(X_test)

# Passo 10: Avaliar o modelo
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred)

# Exibir a acurácia e o relatório de classificação
print(f'Acurácia: {accuracy}')
print(f'Relatório de Classificação:\n{classification_report_result}')


ModuleNotFoundError: No module named 'sklearn'