In [None]:
!pip install auto-sklearn

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
import seaborn as sns
import autosklearn
from autosklearn.classification import AutoSklearnClassifier
from autosklearn.pipeline.components.classification import (
        DecisionTreeClassifier,
        LogisticRegression
    )
from autosklearn.pipeline.components.feature_selection import SelectKBest



ModuleNotFoundError: No module named 'autosklearn'

In [None]:
X_train = pd.read_csv('X_train_resampled.csv')
y_train = pd.read_csv('y_train_resampled.csv')['Diagnostico_Depressao']
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')['Diagnostico_Depressao']

In [None]:
# One Hot Encoding para features categóricas (se necessário)
enc = OneHotEncoder(handle_unknown='ignore')
X_train_encoded = enc.fit_transform(X_train).toarray()
X_test_encoded = enc.transform(X_test).toarray()

# Criando o modelo baseline
baseline_model = LogisticRegression()
baseline_model.fit(X_train_encoded, y_train)

# Fazendo previsões
y_pred_baseline = baseline_model.predict(X_test_encoded)

# Avaliando o modelo baseline
print("Avaliação do Baseline (One Hot Encoder):")
print(f"Acurácia: {accuracy_score(y_test, y_pred_baseline)}")
print(f"Precisão: {precision_score(y_test, y_pred_baseline)}")
print(f"Recall: {recall_score(y_test, y_pred_baseline)}")
print(f"F1-Score: {f1_score(y_test, y_pred_baseline)}")
print(f"AUC: {roc_auc_score(y_test, y_pred_baseline)}")
print(classification_report(y_test, y_pred_baseline))

# Matriz de Confusão
cm = confusion_matrix(y_test, y_pred_baseline)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Matriz de Confusão (Baseline)')
plt.xlabel('Previsões')
plt.ylabel('Valores Reais')
plt.show()

In [2]:
# Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train_encoded, y_train)
y_pred_nb = nb_model.predict(X_test_encoded)

# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_encoded, y_train)
y_pred_rf = rf_model.predict(X_test_encoded)

# Regressão Logística
lr_model = LogisticRegression()
lr_model.fit(X_train_encoded, y_train)
y_pred_lr = lr_model.predict(X_test_encoded)

# Árvore de Decisão
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_encoded, y_train)
y_pred_dt = dt_model.predict(X_test_encoded)

# Auto-Sklearn para encontrar o melhor modelo
automl = AutoSklearnClassifier(
    include_estimators=[DecisionTreeClassifier, LogisticRegression],
    include_preprocessors=[SelectKBest],
    time_left_for_this_task=120,
    per_run_time_limit=30,
    n_jobs=-1
)
automl.fit(X_train_encoded, y_train)
y_pred_automl = automl.predict(X_test_encoded)

# Avaliação dos Modelos
models = [
    ('Baseline', y_pred_baseline, baseline_model),
    ('Naive Bayes', y_pred_nb, nb_model),
    ('Random Forest', y_pred_rf, rf_model),
    ('Regressão Logística', y_pred_lr, lr_model),
    ('Árvore de Decisão', y_pred_dt, dt_model),
    ('Auto-Sklearn', y_pred_automl, automl)
]

metrics = ['Acurácia', 'Precisão', 'Recall', 'F1-Score', 'AUC']
results = {}

for name, y_pred, model in models:
    results[name] = {}
    results[name]['Acurácia'] = accuracy_score(y_test, y_pred)
    results[name]['Precisão'] = precision_score(y_test, y_pred)
    results[name]['Recall'] = recall_score(y_test, y_pred)
    results[name]['F1-Score'] = f1_score(y_test, y_pred)
    results[name]['AUC'] = roc_auc_score(y_test, y_pred)

    print(f"\nAvaliação do {name}:")
    print(classification_report(y_test, y_pred))

    # Matriz de Confusão
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Matriz de Confusão ({name})')
    plt.xlabel('Previsões')
    plt.ylabel('Valores Reais')
    plt.show()

    # Imprimindo a árvore de decisão
    if isinstance(model, DecisionTreeClassifier):
        plt.figure(figsize=(15, 10))
        plot_tree(model, filled=True, feature_names=X_train.columns, class_names=['Sem Depressão', 'Depressão'], rounded=True)
        plt.title(f'Árvore de Decisão ({name})')
        plt.show()

    # Imprimindo a linha de regressão (somente para modelos lineares)
    if isinstance(model, LogisticRegression):
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x=X_train_encoded[:, 0], y=y_train, label='Dados de Treinamento', color='blue')
        sns.lineplot(x=X_test_encoded[:, 0], y=lr_model.predict_proba(X_test_encoded)[:, 1], label='Linha de Regressão', color='red')
        plt.title(f'Linha de Regressão ({name})')
        plt.xlabel('Feature 1 (Exemplo)')
        plt.ylabel('Probabilidade de Depressão')
        plt.show()

# Gráfico comparativo das métricas
fig, axs = plt.subplots(len(metrics), 1, figsize=(10, 6 * len(metrics)))
for i, metric in enumerate(metrics):
    axs[i].bar(results.keys(), [results[model][metric] for model in results])
    axs[i].set_ylabel(metric)
plt.tight_layout()
plt.show()

NameError: name 'GaussianNB' is not defined