# 03 — Modelagem e comparação de modelos
Objetivo:

- Treinar e comparar múltiplos modelos (SVM, KNN, MLP)
- Utilizar validação cruzada estratificada
- Ajustar hiperparâmetros via GridSearch
- Comparar métricas (accuracy, precision, recall, F1)
- Selecionar modelo candidato para Deploy

In [None]:
import numpy as np
import pandas as pd
import sys
from pathlib import Path
from imblearn.over_sampling import SMOTE
sys.path.append(str(Path("..").resolve()))
from src import train 

from src.config import MODELS_CONFIG

from src.visual import plot_confusion_matrix, plot_roc_curve

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

RANDOM_STATE = 42
smote = SMOTE(random_state=RANDOM_STATE)
np.random.seed(RANDOM_STATE)

## Dataset de Treino e Pré-processamento

In [3]:
data_path = "../data/processed/train_dataset.csv"  
TARGET_COL = "Depression"  

df, X, y = train.load_data(data_path, target_col=TARGET_COL)
df.head()

print("Shape de X:", X.shape)
print("Distribuição da variável alvo:")
y.value_counts(normalize=True)

Shape de X: (22257, 11)
Distribuição da variável alvo:


Depression
1    0.585703
0    0.414297
Name: proportion, dtype: float64

## Pipeline dos Modelos 

In [9]:
# Modelos

Model_knn = KNeighborsClassifier(
    n_neighbors=15,
    weights="distance",
    metric="minkowski",
    p=2
)

Model_svm = SVC(kernel='rbf', probability=True)

Model_mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64, 32),
    activation='relu',
    solver='adam',
    max_iter=800,
    early_stopping=True,
    n_iter_no_change=20,
    random_state=42
)


pipeline_knn = train.build_pipeline(Model_knn, 
                                    use_scaler=True, use_smote=True, smote=smote)
pipeline_svm = train.build_pipeline(Model_svm, use_scaler=True, use_smote=True, smote=smote)
pipeline_mlp = train.build_pipeline(Model_mlp, use_scaler=True, use_smote=True, smote=smote)


## Treinamento KNN

In [12]:
results_knn= []

metrics = train.cross_validate(
        pipeline_knn,
        X=X,
        y=y,
        threshold=0.5,
        n_splits=10,
        random_state=RANDOM_STATE,
        verbose=True
    )

train.summarize_cv_results(metrics)

results_knn.append({
        "model_name": 1,
        "model":"KNN - 15",
        "accuracy_mean": np.mean(metrics["accuracy"]),
        "precision_mean": np.mean(metrics["precision"]),
        "recall_mean": np.mean(metrics["recall"]),
        "f1_mean": np.mean(metrics["f1"])
    })


FOLD 1
Acurácia: 0.8095
Precisão: 0.8474
Recall:   0.8227
F1-score: 0.8349

FOLD 2
Acurácia: 0.8342
Precisão: 0.8725
Recall:   0.8397
F1-score: 0.8558

FOLD 3
Acurácia: 0.8351
Precisão: 0.8663
Recall:   0.8497
F1-score: 0.8579

FOLD 4
Acurácia: 0.8360
Precisão: 0.8741
Recall:   0.8413
F1-score: 0.8574

FOLD 5
Acurácia: 0.8293
Precisão: 0.8632
Recall:   0.8420
F1-score: 0.8525

FOLD 6
Acurácia: 0.8302
Precisão: 0.8651
Recall:   0.8413
F1-score: 0.8530

FOLD 7
Acurácia: 0.8329
Precisão: 0.8641
Recall:   0.8482
F1-score: 0.8560

FOLD 8
Acurácia: 0.8189
Precisão: 0.8594
Recall:   0.8258
F1-score: 0.8423

FOLD 9
Acurácia: 0.8279
Precisão: 0.8622
Recall:   0.8404
F1-score: 0.8511

FOLD 10
Acurácia: 0.8252
Precisão: 0.8703
Recall:   0.8243
F1-score: 0.8467

MÉDIAS E DESVIOS-PADRÃO
Accuracy  : 0.8279 | DP: 0.0079
Precision : 0.8645 | DP: 0.0072
Recall    : 0.8375 | DP: 0.0092
F1        : 0.8508 | DP: 0.0071


## Treinamento SVM

In [None]:
results_svm= []

metrics = train.cross_validate(
        pipeline_svm,
        X=X,
        y=y,
        threshold=0.5,
        n_splits=10,
        random_state=RANDOM_STATE,
        verbose=True
    )

train.summarize_cv_results(metrics)

results_knn.append({
        "model_name": 1,
        "model":"SVM - RBF",
        "accuracy_mean": np.mean(metrics["accuracy"]),
        "precision_mean": np.mean(metrics["precision"]),
        "recall_mean": np.mean(metrics["recall"]),
        "f1_mean": np.mean(metrics["f1"])
    })


FOLD 1
Acurácia: 0.8230
Precisão: 0.8582
Recall:   0.8358
F1-score: 0.8468

FOLD 2


## Treinamento MLP