In [29]:
!pip3 install --user pandas boto3 mysql-connector-python scikit-learn numpy tensorflow



In [30]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import matplotlib as mpl
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time
import sys
sys.path.append('../..') # Adiciona o diretório superior ao caminho de importação para acessar funções utilitárias
from functions.db_inserts import * # Importa funções para inserção de dados no banco de dados
from functions.aws_functions import * # Importa funções relacionadas à AWS
from functions.select_cols_functions import * # Importa funções para seleção e processamento de colunas
from tensorflow.keras.regularizers import l2
from sklearn.metrics import mean_absolute_error, cohen_kappa_score
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import backend as K

In [31]:
env_mode = 'prd' # Define o modo de ambiente: 'dev' para desenvolvimento, 'prd' para produção

### Lendo arquivo fonte

In [32]:
bucket_name = "adoptai-trusted-prod-tcc" # Nome do bucket S3 de onde os dados serão carregados

In [33]:
if env_mode == 'dev':
    # Modo de desenvolvimento: carrega e processa os dados localmente
    source = 'feature_engineering'
    df_trusted = pd.read_csv(f'../../../data/{source}.csv')
    df_trusted = apply_all_rules(df_trusted)
    s3 = [f'{source}.csv', df_trusted]
elif env_mode == 'prd':
    # Modo de produção: carrega e processa os dados a partir do S3
    s3 = s3_csv_to_df(bucket_name)
    df_trusted = s3[1]

AttributeError: module 'util.credentials' has no attribute 'aws_access_key_id'

### Utilizando LabelEncoded nas colunas categóricas

In [None]:
df_trusted = adjust_categoric_values(df_trusted)

### Parâmetros de gravação

In [None]:
nome_fonte = s3[0] # Armazena o nome do arquivo de dados utilizado
nome_modelo = 'MLP' # Define o nome do modelo
parameters = {
    'learning_rate': [
        1e-2,
        1e-3,
        1e-4
    ],
}

### Preparativos para executar o modelo

In [None]:
# Listas para armazenar métricas de desempenho e tempos de execução
accuracy = []
precision = []
recall = []
f1 = []
mae = []
qwk = []

start_times = []
end_times = []

In [None]:
# Separar as features (x) e os rótulos (y)
x = df_trusted.iloc[:, :-1] # Seleciona todas as colunas, exceto a última, como features
y = df_trusted.iloc[:, -1] # Seleciona a última coluna como rótulo

In [None]:
# Dividir os dados em conjuntos de treinamento e teste
x_train_full, x_test, y_train_full, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
x_valid, x_train = x_train_full[:int(len(x_train_full) * 0.1)], x_train_full[int(len(x_train_full) * 0.1):]
y_valid, y_train = y_train_full[:int(len(y_train_full) * 0.1)], y_train_full[int(len(y_train_full) * 0.1):]

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_valid = scaler.transform(x_valid)
x_test = scaler.transform(x_test)

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)




###  Rodando modelo Sequencial

In [None]:
def mae_classification(y_true, y_pred):
    y_pred_labels = K.argmax(y_pred, axis=-1)
    y_true_labels = K.cast(y_true, y_pred_labels.dtype)
    return K.mean(K.abs(y_true_labels - y_pred_labels))

In [None]:
for learning_rate in parameters['learning_rate']:

    start_time = start_times.append(time.time()) # Registra o tempo de início

    early_stopping = EarlyStopping(patience=10, restore_best_weights=True)

    # Inicializa o modelo com os parâmetros atuais
    model = keras.models.Sequential([
        Dense(32, activation="relu"),
        Dropout(0.3),
        Dense(16, activation="relu"),
        Dropout(0.3),
        Dense(5, activation="softmax")
    ])

    model.compile(loss="sparse_categorical_crossentropy",
                optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                metrics=["accuracy", mae_classification])

    history = model.fit(
        x_train, y_train,
        epochs=200,
        validation_data=(x_valid, y_valid),
        callbacks=[early_stopping]
    )

    # Fazer previsões no conjunto de teste
    y_pred = model.predict(x_test)
    y_pred_labels = np.argmax(y_pred, axis=1)

    # Calcula e armazena as métricas
    accuracy.append(accuracy_score(y_test, y_pred_labels))
    precision.append(precision_score(y_test, y_pred_labels, average=None))
    recall.append(recall_score(y_test, y_pred_labels, average=None))
    f1.append(f1_score(y_test, y_pred_labels, average=None))
    mae.append(mean_absolute_error(y_test, y_pred_labels))
    qwk.append(cohen_kappa_score(y_test, y_pred_labels, weights='quadratic'))

    end_times.append(time.time()) # Registra o tempo de término

Epoch 1/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.2751 - loss: 1.5495 - mae_classification: 0.9965 - val_accuracy: 0.3420 - val_loss: 1.4394 - val_mae_classification: 1.0000
Epoch 2/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 916us/step - accuracy: 0.3199 - loss: 1.4368 - mae_classification: 0.9721 - val_accuracy: 0.3536 - val_loss: 1.4265 - val_mae_classification: 1.0000
Epoch 3/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 824us/step - accuracy: 0.3413 - loss: 1.4267 - mae_classification: 0.9909 - val_accuracy: 0.3578 - val_loss: 1.4253 - val_mae_classification: 1.0000
Epoch 4/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 941us/step - accuracy: 0.3337 - loss: 1.4224 - mae_classification: 0.9899 - val_accuracy: 0.3553 - val_loss: 1.4172 - val_mae_classification: 0.9737
Epoch 5/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 793us/step - acc

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.2198 - loss: 1.7579 - mae_classification: 1.0000 - val_accuracy: 0.2861 - val_loss: 1.4871 - val_mae_classification: 1.0000
Epoch 2/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 827us/step - accuracy: 0.2856 - loss: 1.4953 - mae_classification: 1.0000 - val_accuracy: 0.2952 - val_loss: 1.4716 - val_mae_classification: 1.0000
Epoch 3/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 781us/step - accuracy: 0.2978 - loss: 1.4704 - mae_classification: 1.0000 - val_accuracy: 0.3445 - val_loss: 1.4577 - val_mae_classification: 1.0000
Epoch 4/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 772us/step - accuracy: 0.3114 - loss: 1.4494 - mae_classification: 1.0000 - val_accuracy: 0.3394 - val_loss: 1.4467 - val_mae_classification: 1.0000
Epoch 5/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 773us/step - accuracy: 0.326

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.2131 - loss: 2.0281 - mae_classification: 1.0000 - val_accuracy: 0.2485 - val_loss: 1.6570 - val_mae_classification: 1.0000
Epoch 2/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 752us/step - accuracy: 0.2376 - loss: 1.7408 - mae_classification: 0.9997 - val_accuracy: 0.2669 - val_loss: 1.5615 - val_mae_classification: 0.9737
Epoch 3/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 750us/step - accuracy: 0.2533 - loss: 1.6391 - mae_classification: 0.9931 - val_accuracy: 0.2802 - val_loss: 1.5241 - val_mae_classification: 0.9737
Epoch 4/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 784us/step - accuracy: 0.2670 - loss: 1.5823 - mae_classification: 0.9940 - val_accuracy: 0.2869 - val_loss: 1.5050 - val_mae_classification: 1.0000
Epoch 5/200
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 762us/step - accuracy: 0.262

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Obtendo métricas de desempenho

In [None]:
# Armazena as métricas calculadas
metrics = {
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1
}

### Gravando dados no banco

In [None]:
# Prepara os dados para inserção no banco de dados
model = {'nome_modelo': nome_modelo, 'nome_fonte': nome_fonte}
execution = {
    'accuracy': accuracy, 
    'mae': mae,
    'qwk': qwk,
    'start_time': start_times, 
    'end_time': end_times
    }
hyperparams = parameters
desempenho = {
    'precision_values': metrics['precision'], 
    'recall_values': metrics['recall'], 
    'f1_values': metrics['f1_score']
    }

In [None]:
# Insere os valores calculados nas tabelas do banco de dados
insert_values(model, execution, hyperparams, desempenho, env=env_mode)

Executando em dev
Registros inseridos em dev
