## Tunning dos hiperparâmetros
- Será realizado o tunning para os seguintes casos:
    1) sem informações do WHOQOL na base
    2) com todas as respostas do WHOQOL
    3) só com o índice final do WHOQOL
    4) só com os índices dos domínios do WHOQOL

- Para os seguintes modelos:
    1) Random Forest
    2) SVM
    3) MLP
    4) XGBoost

#### Workflow:
Para cada caso:
- Importar dados
- Separar em treino e teste ( Preciso
- Definir os parâmetros a serem testados
- Realizar o tunning
- Salvar os resultados graficamente
- Salvar o melhor modelo

### Importando as bibliotecas

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import LabelEncoder
from tpot import TPOTClassifier

import warnings
warnings.filterwarnings('ignore')



### Definindo a função

In [5]:
def perform_model_tuning(data, target, models, param_grids, base_name):
    # Realizar Random Under-Sampling
    sampler = RandomUnderSampler()
    data_resampled, target_resampled = sampler.fit_resample(data, target)

    # Dividir os dados em treinamento e teste após o Random Under-Sampling
    X_train, X_test, y_train, y_test = train_test_split(data_resampled, target_resampled, test_size=0.2, random_state=42)

    best_models = {}  # Dicionário para armazenar o melhor modelo de cada tipo
    model_accuracies = {}  # Dicionário para armazenar as acurácias de cada modelo
    current_dir = os.getcwd()
    for model_name, model, param_grid_entry in zip(models.keys(), models.values(), param_grids):
        print(f"Tuning do modelo {model_name} iniciado...")

        # Acessar o dicionário de parâmetros correspondente ao modelo atual
        param_grid = param_grid_entry['params']

        # Realizar a busca em grid para encontrar os melhores parâmetros
        grid_search = GridSearchCV(model, param_grid, cv=10, scoring='accuracy')
        grid_search.fit(X_train, y_train)

        # Melhor modelo encontrado
        best_model = grid_search.best_estimator_

        # Fazer previsões no conjunto de teste
        y_pred = best_model.predict(X_test)

        # Calcular a acurácia do modelo
        accuracy = accuracy_score(y_test, y_pred)
        model_accuracies[model_name] = accuracy

        print(f"Acurácia do modelo {model_name}: {accuracy:.4f}")

        # Salvar o melhor modelo
        best_models[model_name] = best_model

    # Plotar gráfico de desempenho comparativo
    plt.figure(figsize=(10, 6))
    plt.bar(model_accuracies.keys(), model_accuracies.values())
    plt.xlabel('Modelos')
    plt.ylabel('Acurácia')
    plt.title('Comparação de desempenho dos modelos')
    plt.xticks(rotation=45)
    plt.tight_layout()
    graph_filename = f"{base_name}_graph_{model_name}.png"
    graph_path = os.path.join(current_dir, graph_filename)
    plt.savefig(graph_path)
    plt.close()

    # Salvar tabela com os valores de acurácia
    table_filename = f"{base_name}_table_{model_name}.csv"
    accuracies_df = pd.DataFrame.from_dict(model_accuracies, orient='index', columns=['Acurácia'])
    table_path = os.path.join(current_dir, table_filename)
    accuracies_df.to_csv(table_path, index=False)

    return best_models

### Importando os dados

In [6]:
# Sem WHOQOL:
base1 = pd.read_csv('Datasets/dataframe_socioeconomico_Niveis.csv')
data1 = base1.drop('Nivel_MHI', axis=1)
target1 = base1['Nivel_MHI']
le = LabelEncoder()
target1 = le.fit_transform(target1)

# com todas as respostas do WHOQOL
base2 = pd.read_csv('Datasets/df_social_whoqol_tratado_1.csv')
data2 = base2.drop('Nivel_MHI', axis=1)
target2 = base2['Nivel_MHI']
target2 = le.fit_transform(target2)


# só com o indice dos dominiosdo WHOQOL, concatenar base1 com final da base2
data3 = pd.concat([data1, base2.loc[:,['CR','CS','CT','CU']]], axis=1)
target3 = base2['Nivel_MHI']
target3 = le.fit_transform(target3)


# Só com a qualidade de vida do WHOQOL
data4 = pd.concat([data1, base2['BR']], axis=1)
target4 = base2['Nivel_MHI']
target4 = le.fit_transform(target4)




In [7]:
# Definindo parametros:
# Definir os modelos e os grids de parâmetros
models = {
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'MLP': MLPClassifier(),
    'XGBoost': XGBClassifier()
}

param_grids = [
    {'model': 'Random Forest', 'params': {'n_estimators': [100, 200, 300], 'max_depth': [None, 5, 10]}},
    {'model': 'SVM', 'params': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}},
    {'model': 'MLP', 'params': {'hidden_layer_sizes': [(10,), (50,), (100,)], 'alpha': [0.001, 0.01, 0.1]}},
    {'model': 'XGBoost', 'params': {'n_estimators': [100, 200, 300], 'max_depth': [3, 5, 7]}},
]



In [15]:
# Chamando função para cada uma das bases
# Aplicando Random Forest no under-sampling com Cross Validation de 10 folds
from sklearn.model_selection import cross_val_score
model = RandomForestClassifier()
c1= cross_val_score(model, data1, target1, cv=10)
print(c1)
print(c1.mean())

[0.67045455 0.67045455 0.66666667 0.67816092 0.64367816 0.65517241
 0.64367816 0.63218391 0.66666667 0.66666667]
0.6593782654127482


In [16]:
c2 = cross_val_score(model, data2, target2, cv=10)
print(c2)
print(c2.mean())

[0.76136364 0.71590909 0.82758621 0.8045977  0.79310345 0.7816092
 0.77011494 0.85057471 0.73563218 0.81609195]
0.7856583072100313


In [17]:
c3 = cross_val_score(model, data3, target3, cv=10)
print(c3)
print(c3.mean())

[0.72727273 0.72727273 0.79310345 0.72413793 0.79310345 0.68965517
 0.72413793 0.82758621 0.74712644 0.73563218]
0.7489028213166143


In [18]:
c4 = cross_val_score(model, data4, target4, cv=10)
print(c4)
print(c4.mean())

[0.67045455 0.65909091 0.66666667 0.68965517 0.65517241 0.71264368
 0.63218391 0.6091954  0.64367816 0.67816092]
0.6616901776384535


In [None]:
# Chamando função para cada uma das bases
print("========================== Data 1  =================================")
best_models1 = perform_model_tuning(data1, target1, models, param_grids, "data1")
print("========================== Data 2  =================================")
best_models2 = perform_model_tuning(data2, target2, models, param_grids,"data2")
print("========================== Data 3  =================================")
best_models3 = perform_model_tuning(data3, target3, models, param_grids, "data3")
print("========================== Data 4  =================================")
best_models4 = perform_model_tuning(data4, target4, models, param_grids, "data4")

In [None]:

print("========================== Data 1  =================================")
X_train, X_test, y_train, y_test = train_test_split(data1, target1, test_size=0.2)
tpot = TPOTClassifier(verbosity=2, config_dict='TPOT light')
tpot.fit(X_train, y_train)
accuracy = tpot.score(X_test, y_test)
print(f"Acurácia do melhor modelo encontrado: {accuracy}")
tpot.export('tpot_best_model1.py')
print("========================== Data 2  =================================")
X_train, X_test, y_train, y_test = train_test_split(data2, target2, test_size=0.2)
tpot2 = TPOTClassifier(verbosity=2, config_dict='TPOT light')
tpot2.fit(X_train, y_train)
accuracy = tpot2.score(X_test, y_test)
print(f"Acurácia do melhor modelo encontrado: {accuracy}")
tpot2.export('tpot_best_model2.py')
print("========================== Data 3  =================================")
X_train, X_test, y_train, y_test = train_test_split(data3, target3, test_size=0.2)
tpot3 = TPOTClassifier(verbosity=2, config_dict='TPOT light')
tpot3.fit(X_train, y_train)
accuracy = tpot3.score(X_test, y_test)
print(f"Acurácia do melhor modelo encontrado: {accuracy}")
tpot3.export('tpot_best_model3.py')
print("========================== Data 4  =================================")
X_train, X_test, y_train, y_test = train_test_split(data4, target4, test_size=0.2)
tpot4 = TPOTClassifier(verbosity=2,config_dict='TPOT light')
tpot4.fit(X_train, y_train)
accuracy = tpot4.score(X_test, y_test)
print(f"Acurácia do melhor modelo encontrado: {accuracy}")
tpot4.export('tpot_best_model4.py')