## Tunning dos hiperparâmetros
- Será realizado o tunning para os seguintes casos:
    1) sem informações do WHOQOL na base
    2) com todas as respostas do WHOQOL
    3) só com o índice final do WHOQOL
    4) só com os índices dos domínios do WHOQOL

- Para os seguintes modelos:
    1) Random Forest
    2) SVM
    3) MLP
    4) XGBoost

#### Workflow:
Para cada caso:
- Importar dados
- Separar em treino e teste ( Preciso
- Definir os parâmetros a serem testados
- Realizar o tunning
- Salvar os resultados graficamente
- Salvar o melhor modelo

### Importando as bibliotecas

In [77]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import LabelEncoder
from tpot import TPOTClassifier
import os
import pickle
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings('ignore')

### Definindo a função

In [2]:
def perform_model_tuning(data, target, models, param_grids, base_name):
    # Dividir os dados em treinamento e teste após o Random Under-Sampling
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

    best_models = {}  # Dicionário para armazenar o melhor modelo de cada tipo
    model_accuracies = {}  # Dicionário para armazenar as acurácias de cada modelo
    current_dir = os.getcwd()
    for model_name, model, param_grid_entry in zip(models.keys(), models.values(), param_grids):
        print(f"Tuning do modelo {model_name} iniciado...")

        # Acessar o dicionário de parâmetros correspondente ao modelo atual
        param_grid = param_grid_entry['params']

        # Realizar a busca em grid para encontrar os melhores parâmetros
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
        grid_search.fit(X_train, y_train)

        # Melhor modelo encontrado
        best_model = grid_search.best_estimator_

        # Fazer previsões no conjunto de teste
        y_pred = best_model.predict(X_test)

        # Calcular a acurácia do modelo
        accuracy = accuracy_score(y_test, y_pred)
        model_accuracies[model_name] = accuracy

        print(f"Acurácia do modelo {model_name}: {accuracy:.4f}")

        # Salvar o melhor modelo
        best_models[model_name] = best_model

    # Plotar gráfico de desempenho comparativo
    plt.figure(figsize=(10, 6))
    plt.bar(model_accuracies.keys(), model_accuracies.values())
    plt.xlabel('Modelos')
    plt.ylabel('Acurácia')
    plt.title('Comparação de desempenho dos modelos')
    plt.xticks(rotation=45)
    plt.tight_layout()
    graph_filename = f"{base_name}_graph_{model_name}.png"
    graph_path = os.path.join(current_dir, graph_filename)
    plt.savefig(graph_path)
    plt.close()


    #Pickle the single best model, verifying the best and saving from best_models dict, se tiver mais algo salvo com o mesmo nome, adiciona mais um.
    best_model = max(model_accuracies, key=model_accuracies.get)
    best_model = best_models[best_model]
    if os.path.exists(f"{base_name}_best_model.pkl"):
        i = 1
        while os.path.exists(f"{base_name}_best_model_{i}.pkl"):
            i += 1
        pickle.dump(best_model, open(f"{base_name}_best_model_{i}.pkl", "wb"))
    else:
        pickle.dump(best_model, open(f"{base_name}_best_model.pkl", "wb"))
    # Salvar tabela com os valores de acurácia
    table_filename = f"{base_name}_table_{model_name}.csv"
    accuracies_df = pd.DataFrame.from_dict(model_accuracies, orient='index', columns=['Acurácia'])
    table_path = os.path.join(current_dir, table_filename)
    accuracies_df.to_csv(table_path, index=False)

    return best_models

### Importando os dados

In [17]:
# Sem WHOQOL:
base1 = pd.read_csv('Datasets/dataframe_socioeconomico_Niveis.csv')
data1 = base1.drop('Nivel_MHI', axis=1)
target1 = base1['Nivel_MHI']
le = LabelEncoder()
target1 = le.fit_transform(target1)

# Realizar Random Under-Sampling
sampler = RandomUnderSampler()
data1_re, target1_re = sampler.fit_resample(data1, target1)


# com todas as respostas do WHOQOL
base2 = pd.read_csv('Datasets/df_social_whoqol_tratado_1.csv')
data2 = base2.drop('Nivel_MHI', axis=1)
target2 = base2['Nivel_MHI']
target2 = le.fit_transform(target2)
sampler = RandomUnderSampler()
data2_re, target2_re = sampler.fit_resample(data2, target2)


# só com o indice dos dominiosdo WHOQOL, concatenar base1 com final da base2
data3 = pd.concat([data1, base2.loc[:,['CR','CS','CT','CU']]], axis=1)
target3 = base2['Nivel_MHI']
target3 = le.fit_transform(target3)
sampler = RandomUnderSampler()
data3_re, target3_re = sampler.fit_resample(data3, target3)


# Só com a qualidade de vida do WHOQOL
data4 = pd.concat([data1, base2['BR']], axis=1)
target4 = base2['Nivel_MHI']
target4 = le.fit_transform(target4)
sampler = RandomUnderSampler()
data4_re, target4_re = sampler.fit_resample(data4, target4)




In [25]:
# Dividir os dados em treino e teste
X_train1, X_test1, y_train1, y_test1 = train_test_split(data1_re, target1_re, test_size=0.2)
X_train2, X_test2, y_train2, y_test2 = train_test_split(data2_re, target2_re, test_size=0.2)
X_train3, X_test3, y_train3, y_test3 = train_test_split(data3_re, target3_re, test_size=0.2)
X_train4, X_test4, y_train4, y_test4 = train_test_split(data4_re, target4_re, test_size=0.2)


In [4]:
# Definindo parametros:
# Definir os modelos e os grids de parâmetros
models = {
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'MLP': MLPClassifier(),
    'XGBoost': XGBClassifier()
}

param_grids = [
    {
        'model': 'Random Forest',
        'params': {
            'n_estimators': [50, 100, 200, 300, 400, 500],
            'max_depth': [None, 5, 10, 15, 20],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'max_features': ['auto', 'sqrt', 'log2']
        }
    },
    {
        'model': 'SVM',
        'params': {
            'C': [0.1, 1, 10, 100],
            'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
            'gamma': ['scale', 'auto']
        }
    },
    {
        'model': 'MLP',
        'params': {
            'hidden_layer_sizes': [(10,), (50,), (100,), (150,), (200,)],
            'activation': ['logistic', 'tanh', 'relu'],
            'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
            'learning_rate': ['constant', 'invscaling', 'adaptive']
        }
    },
    {
        'model': 'XGBoost',
        'params': {
            'n_estimators': [50, 100, 200, 300, 400, 500],
            'max_depth': [3, 5, 7, 9, 11],
            'learning_rate': [0.01, 0.1, 0.2, 0.3],
            'subsample': [0.6, 0.8, 1.0],
            'colsample_bytree': [0.6, 0.8, 1.0]
        }
    }
]





In [71]:
# Criando uma função para avaliar os modelos
def avalia_modelo_cv(model, X_input, y_input, n, resultados_individuais=False):
    kf = KFold(n_splits=n)
    reports = []
    y_true = []
    y_pred_list = []
    X_array = X_input.values
    accuracy_list = []
    model_name = type(model).__name__
    for fold_idx, (train_index, test_index) in enumerate(kf.split(X_input)):
        X_train, X_test = X_array[train_index], X_array[test_index]
        y_train, y_test = y_input[train_index], y_input[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_true.extend(y_test)
        y_pred_list.append(y_pred)
        report = classification_report(y_test, y_pred, zero_division=0)
        reports.append(report)
        accuracy_list.append(accuracy_score(y_test, y_pred))
        if resultados_individuais:
            print(f"Classification Report for fold {fold_idx + 1}:\n{report}\n")
    y_pred_total = np.concatenate(y_pred_list)
    print("-" * 45)
    print("model_name: ", model_name)
    print("Average Classification Report:")
    avg_report = classification_report(y_true, y_pred_total, zero_division=0)
    print(avg_report)
    #Retorna a acuracia media
    return accuracy_score(y_true, y_pred_total)

In [96]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import cross_val_predict
import numpy as np


def perform_cross_validation(model, X, y, cv, verbose=True):
    reports = []
    predictions = cross_val_predict(model, X, y, cv=cv)

    for fold in range(cv):
        start = fold * len(X) // cv
        end = (fold + 1) * len(X) // cv
        report = classification_report(y[start:end], predictions[start:end], output_dict=True)
        reports.append(report)

        if verbose:
            print(f"Fold {fold + 1} Classification Report:")
            print(classification_report(y[start:end], predictions[start:end]))
            print("-" * 50)

    avg_report = {
        'precision': np.mean([report['weighted avg']['precision'] for report in reports]),
        'recall': np.mean([report['weighted avg']['recall'] for report in reports]),
        'f1-score': np.mean([report['weighted avg']['f1-score'] for report in reports]),
        'support': np.mean([report['weighted avg']['support'] for report in reports])
    }

    accuracy = accuracy_score(y, predictions)

    if verbose or not verbose:
        print("Average Classification Report:")
        print(classification_report(y, predictions))
        print("-" * 50)
        print(f"Accuracy: {accuracy:.2f}")

    return None


In [97]:
# Chamando função para cada uma das bases
# Aplicando Random Forest no under-sampling com Cross Validation de 10 folds
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
model = RandomForestClassifier()
#avalia_modelo_cv(model, data1_re, target1_re, 5, True)

perform_cross_validation(model, data1_re, target1_re, 5, True)



Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.39      0.56        89
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0

    accuracy                           0.39        89
   macro avg       0.33      0.13      0.19        89
weighted avg       1.00      0.39      0.56        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.47      0.59        60
           1       0.62      0.52      0.57        29
           2       0.00      0.00      0.00         0

    accuracy                           0.48        89
   macro avg       0.48      0.33      0.39        89
weighted avg       0.74      0.48      0.58        89

--------------------------------------------------
Fold 3 Classification Report:
              precision    recall  f1-score   

In [98]:
perform_cross_validation(model, data2_re, target2_re, 5, True)


Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.76      0.87        89
           2       0.00      0.00      0.00         0

    accuracy                           0.76        89
   macro avg       0.50      0.38      0.43        89
weighted avg       1.00      0.76      0.87        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.77      0.87        60
           1       1.00      0.93      0.96        29
           2       0.00      0.00      0.00         0

    accuracy                           0.82        89
   macro avg       0.67      0.57      0.61        89
weighted avg       1.00      0.82      0.90        89

--------------------------------------------------
Fold 3 Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00  

In [99]:
perform_cross_validation(model, data3_re, target3_re, 5, True)

Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.74      0.85        89
           2       0.00      0.00      0.00         0

    accuracy                           0.74        89
   macro avg       0.50      0.37      0.43        89
weighted avg       1.00      0.74      0.85        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.75      0.86        60
           1       0.96      0.93      0.95        29
           2       0.00      0.00      0.00         0

    accuracy                           0.81        89
   macro avg       0.65      0.56      0.60        89
weighted avg       0.99      0.81      0.89        89

--------------------------------------------------
Fold 3 Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.81      0.90  

In [100]:
perform_cross_validation(model, data4_re, target4_re, 5, True)

Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.61      0.76        89
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0

    accuracy                           0.61        89
   macro avg       0.33      0.20      0.25        89
weighted avg       1.00      0.61      0.76        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.62      0.74        60
           1       0.77      0.69      0.73        29
           2       0.00      0.00      0.00         0

    accuracy                           0.64        89
   macro avg       0.56      0.44      0.49        89
weighted avg       0.87      0.64      0.74        89

--------------------------------------------------
Fold 3 Classification Report:
              precision    recall  f1-score   

In [9]:
# Chamando função para cada uma das bases
print("========================== Data 1  =================================")
best_models1 = perform_model_tuning(data1, target1, models, param_grids, "data1")
print("========================== Data 2  =================================")
best_models2 = perform_model_tuning(data2, target2, models, param_grids,"data2")
print("========================== Data 3  =================================")
best_models3 = perform_model_tuning(data3, target3, models, param_grids, "data3")
print("========================== Data 4  =================================")
best_models4 = perform_model_tuning(data4, target4, models, param_grids, "data4")

Tuning do modelo Random Forest iniciado...
Acurácia do modelo Random Forest: 0.4556
Tuning do modelo SVM iniciado...
Acurácia do modelo SVM: 0.4222
Tuning do modelo MLP iniciado...
Acurácia do modelo MLP: 0.3889
Tuning do modelo XGBoost iniciado...
Acurácia do modelo XGBoost: 0.3889
Tuning do modelo Random Forest iniciado...
Acurácia do modelo Random Forest: 0.7444
Tuning do modelo SVM iniciado...
Acurácia do modelo SVM: 0.7333
Tuning do modelo MLP iniciado...
Acurácia do modelo MLP: 0.6889
Tuning do modelo XGBoost iniciado...
Acurácia do modelo XGBoost: 0.7111
Tuning do modelo Random Forest iniciado...
Acurácia do modelo Random Forest: 0.7222
Tuning do modelo SVM iniciado...
Acurácia do modelo SVM: 0.6778
Tuning do modelo MLP iniciado...
Acurácia do modelo MLP: 0.6889
Tuning do modelo XGBoost iniciado...
Acurácia do modelo XGBoost: 0.6667
Tuning do modelo Random Forest iniciado...
Acurácia do modelo Random Forest: 0.5444
Tuning do modelo SVM iniciado...
Acurácia do modelo SVM: 0.4556


In [101]:
# Pegar o melhor modelo de cada base, usando max, para em seguida aplicar SBFS e SFFS com cross validation
# para cada um dos modelos
#best_model1 = max(best_models1, key=best_models1.get)
modelo1 = best_models1['Random Forest']

#best_model2 = max(best_models2, key=best_models2.get)
modelo2 = best_models2['Random Forest']

#best_model3 = max(best_models3, key=best_models3.get)
modelo3 = best_models3['Random Forest']

#best_model4 = max(best_models4, key=best_models4.get)
modelo4 = best_models4['Random Forest']
print(modelo1)
print(modelo2)
print(modelo3)
print(modelo4)

RandomForestClassifier(max_features='auto', min_samples_split=10,
                       n_estimators=400)
RandomForestClassifier(max_depth=5, max_features='log2', min_samples_leaf=4,
                       min_samples_split=10)
RandomForestClassifier(max_depth=10, max_features='auto', min_samples_leaf=2,
                       min_samples_split=5, n_estimators=300)
RandomForestClassifier(max_depth=5, max_features='auto', min_samples_leaf=4,
                       n_estimators=300)


In [110]:
# Salvando modelo1 à 4 em pickle
import pickle
with open('modelo1.pkl', 'wb') as f:
    pickle.dump(modelo1, f)
with open('modelo2.pkl', 'wb') as f:
    pickle.dump(modelo2, f)
with open('modelo3.pkl', 'wb') as f:
    pickle.dump(modelo3, f)
with open('modelo4.pkl', 'wb') as f:
    pickle.dump(modelo4, f)

In [103]:
# Analisar cross validation para cada um dos modelos
print("========================== Data 1  =================================")
c1 = cross_val_score(modelo1, data1_re, target1_re , cv=5)
print(c1)
print(c1.mean())

perform_cross_validation(modelo1, data1_re, target1_re, 5, True)

[0.44444444 0.47777778 0.46067416 0.48314607 0.40449438]
0.45410736579275907
Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.43      0.60        89
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0

    accuracy                           0.43        89
   macro avg       0.33      0.14      0.20        89
weighted avg       1.00      0.43      0.60        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.43      0.57        60
           1       0.60      0.62      0.61        29
           2       0.00      0.00      0.00         0

    accuracy                           0.49        89
   macro avg       0.47      0.35      0.39        89
weighted avg       0.74      0.49      0.58        89

--------------------------------------------------

In [104]:
print("========================== Data 2  =================================")
c2 = cross_val_score(modelo2, data2_re, target2_re, cv=5)
print(c2)
print(c2.mean())

perform_cross_validation(modelo2, data2_re, target2_re, 5, True)

[0.71111111 0.82222222 0.83146067 0.66292135 0.75280899]
0.7561048689138576
Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.80      0.89        89
           2       0.00      0.00      0.00         0

    accuracy                           0.80        89
   macro avg       0.50      0.40      0.44        89
weighted avg       1.00      0.80      0.89        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.77      0.87        60
           1       1.00      0.97      0.98        29
           2       0.00      0.00      0.00         0

    accuracy                           0.83        89
   macro avg       0.67      0.58      0.62        89
weighted avg       1.00      0.83      0.91        89

--------------------------------------------------
Fold 3 Classification Report:
              precision 

In [106]:
print("========================== Data 3  =================================")
c3 = cross_val_score(modelo3, data3_re, target3_re, cv=5)
print(c3)
print(c3.mean())

perform_cross_validation(modelo3, data3_re, target3_re, 5, True)


[0.68888889 0.67777778 0.75280899 0.68539326 0.70786517]
0.7025468164794008
Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.78      0.87        89
           2       0.00      0.00      0.00         0

    accuracy                           0.78        89
   macro avg       0.50      0.39      0.44        89
weighted avg       1.00      0.78      0.87        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.72      0.83        60
           1       1.00      0.90      0.95        29
           2       0.00      0.00      0.00         0

    accuracy                           0.78        89
   macro avg       0.67      0.54      0.59        89
weighted avg       1.00      0.78      0.87        89

--------------------------------------------------
Fold 3 Classification Report:
              precision 

In [107]:
print("========================== Data 4  =================================")
c4 = cross_val_score(modelo4, data4_re, target4_re, cv=5)
print(c4)
print(c4.mean())

perform_cross_validation(modelo4, data4_re, target4_re, 5, True)

[0.48888889 0.54444444 0.64044944 0.5505618  0.50561798]
0.5459925093632958
Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.67      0.81        89
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0

    accuracy                           0.67        89
   macro avg       0.33      0.22      0.27        89
weighted avg       1.00      0.67      0.81        89

--------------------------------------------------
Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.70      0.79        60
           1       0.82      0.79      0.81        29
           2       0.00      0.00      0.00         0

    accuracy                           0.73        89
   macro avg       0.58      0.50      0.53        89
weighted avg       0.88      0.73      0.80        89

--------------------------------------------------


In [12]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

# Função para aplicar SBFS e SFFS com cross validation e salvar tabela com resultado.
def perform_feature_selection(data, target, model, best_model, method, cv):
    # Criando o objeto de seleção de features
    if method == 'SBFS':
        sfs = SFS(model,
                  k_features="best",
                  forward=False,
                  floating=True,
                  verbose=2,
                  scoring='accuracy',
                  cv=cv,
                  n_jobs=-1)
    elif method == 'SFFS':
        sfs = SFS(model,
                  k_features="best",
                  forward=True,
                  floating=True,
                  verbose=2,
                  scoring='accuracy',
                  cv=cv,
                  n_jobs=-1)
    # Aplicando o método de seleção de features
    sfs = sfs.fit(data, target)
    # Salvando o resultado em um dataframe
    df = pd.DataFrame.from_dict(sfs.get_metric_dict()).T
    # Salvando o resultado em um arquivo csv, adicionando 1 caso já exsita um arquivo com o mesmo nome
    if os.path.exists(f"results/{method}_{best_model}.csv"):
        i = 1
        while os.path.exists(f"results/{method}_{best_model}_{i}.csv"):
            i += 1
        file_name = f"results/{method}_{best_model}_{i}.csv"
    else:
        file_name = f"results/{method}_{best_model}.csv"

    # Definindo nome do arquivo para salvar as features selecionadas, adicionando 1 caso já exsita um arquivo com o mesmo nome
    if os.path.exists(f"results/{method}_{best_model}.txt"):
        i = 1
        while os.path.exists(f"results/{method}_{best_model}_{i}.txt"):
            i += 1
        file_name = f"results/{method}_{best_model}_{i}.txt"
    else:
        file_name = f"results/{method}_{best_model}.txt"

    # Retornando as features selecionadas e o score
    return sfs.k_feature_names_, sfs.k_score_

In [13]:
# Chamando a função de feature selection para cada um dos "modelos*"
print("========================== Data 1  =================================")
slt_sbfs_features1, score1_sbfs = perform_feature_selection(data1, target1, modelo1, "data1", 'SBFS', 5)
slt_sffs_features1, score1_sffs = perform_feature_selection(data1, target1, modelo1, "data1", 'SFFS', 5)
print("========================== Data 2  =================================")
slt_sbfs_features2, score2_sbfs = perform_feature_selection(data2, target2, modelo2, "data2", 'SBFS', 5)
slt_sffs_features2, score2_sffs = perform_feature_selection(data2, target2, modelo2,"data2", 'SFFS', 5)
print("========================== Data 3  =================================")
slt_sbfs_features3, score3_sbfs = perform_feature_selection(data3, target3, modelo3, "data3", 'SBFS', 5)
slt_sffs_features3, score3_sffs = perform_feature_selection(data3, target3, modelo3, "data3", 'SFFS', 5)
print("========================== Data 4  =================================")
slt_sbfs_features4, score4_sbfs = perform_feature_selection(data4, target4, modelo4,"data4", 'SBFS', 5)
slt_sffs_features4, score4_sffs = perform_feature_selection(data4, target4, modelo4, "data4", 'SFFS', 5)



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done  54 out of  54 | elapsed:    6.1s finished

[2023-06-22 03:14:26] Features: 53/1 -- score: 0.6582594417077174[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done  53 out of  53 | elapsed:    2.2s finished

[2023-06-22 03:14:28] Features: 52/1 -- score: 0.6582594417077174[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done  52 out of  52 | elapsed:    2.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.3s finished

[2023-06-22 03:14:31] Features: 51/1 -- score: 0.6582594417077174[Parallel(n_jobs=-1)]: Using



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  84 out of  84 | elapsed:    3.8s finished

[2023-06-22 03:17:45] Features: 83/1 -- score: 0.7603612479474549[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  83 out of  83 | elapsed:    2.9s finished

[2023-06-22 03:17:48] Features: 82/1 -- score: 0.7764203612479476[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  82 out of  82 | elapsed:    3.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.1s finished

[2023-06-22 03:17:51] Features: 81/1 -- score: 0.7775697865353038[Parallel(n_jobs=-1)]: Using



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done  58 out of  58 | elapsed:    2.3s finished

[2023-06-22 03:38:11] Features: 57/1 -- score: 0.7569129720853859[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  57 out of  57 | elapsed:    1.8s finished

[2023-06-22 03:38:13] Features: 56/1 -- score: 0.7603481116584565[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  56 out of  56 | elapsed:    1.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished

[2023-06-22 03:38:15] Features: 55/1 -- score: 0.7614975369458128[Parallel(n_jobs=-1)]: Using



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   50.4s
[Parallel(n_jobs=-1)]: Done  55 out of  55 | elapsed:  1.5min finished

[2023-06-22 03:46:28] Features: 54/1 -- score: 0.6846568144499179[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   50.8s
[Parallel(n_jobs=-1)]: Done  54 out of  54 | elapsed:  1.5min finished

[2023-06-22 03:47:56] Features: 53/1 -- score: 0.6869490968801314[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   50.2s
[Parallel(n_jobs=-1)]: Done  53 out of  53 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    9.8s finished

[2023-06-22 03:49:32] Features: 52/1 -- score: 0.6857996715927751[Parallel(n_jobs=-1)]: Using

In [109]:
# Criando um arquivo txt com a feature selecionadas
with open(f"results/SBFS_data1.txt", "w") as f:
    f.write(str(slt_sbfs_features1))
with open(f"results/SFFS_data1.txt", "w") as f:
    f.write(str(slt_sffs_features1))
with open(f"results/SBFS_data2.txt", "w") as f:
    f.write(str(slt_sbfs_features2))
with open(f"results/SFFS_data2.txt", "w") as f:
    f.write(str(slt_sffs_features2))
with open(f"results/SBFS_data3.txt", "w") as f:
    f.write(str(slt_sbfs_features3))
with open(f"results/SFFS_data3.txt", "w") as f:
    f.write(str(slt_sffs_features3))
with open(f"results/SBFS_data4.txt", "w") as f:
    f.write(str(slt_sbfs_features4))
with open(f"results/SFFS_data4.txt", "w") as f:
    f.write(str(slt_sffs_features4))


In [108]:
# Testando os modelos com as features selecionadas
print("========================== Data 1  - SBFS =================================")
perform_cross_validation(modelo1, data1_re.loc[:,slt_sbfs_features1], target1_re, 5, False)

print("========================== Data 1  - SFFS =================================")
perform_cross_validation(modelo1, data1_re.loc[:,slt_sffs_features1], target1_re, 5, False)

print("========================== Data 2  - SBFS =================================")
perform_cross_validation(modelo2, data2_re.loc[:,slt_sbfs_features2], target2_re, 5, False)

print("========================== Data 2  - SFFS =================================")
perform_cross_validation(modelo2, data2_re.loc[:,slt_sffs_features2], target2_re, 5, False)

print("========================== Data 3  - SBFS =================================")
perform_cross_validation(modelo3, data3_re.loc[:,slt_sbfs_features3], target3_re, 5, False)

print("========================== Data 3  - SFFS =================================")
perform_cross_validation(modelo3, data3_re.loc[:,slt_sffs_features3], target3_re, 5, False)

print("========================== Data 4  - SBFS =================================")
perform_cross_validation(modelo4, data4_re.loc[:,slt_sbfs_features4], target4_re, 5, False)

print("========================== Data 4  - SFFS =================================")
perform_cross_validation(modelo4, data4_re.loc[:,slt_sffs_features4], target4_re, 5, False)


Average Classification Report:
              precision    recall  f1-score   support

           0       0.47      0.44      0.45       149
           1       0.53      0.58      0.55       149
           2       0.34      0.33      0.34       149

    accuracy                           0.45       447
   macro avg       0.45      0.45      0.45       447
weighted avg       0.45      0.45      0.45       447

--------------------------------------------------
Accuracy: 0.45
Average Classification Report:
              precision    recall  f1-score   support

           0       0.40      0.53      0.46       149
           1       0.38      0.64      0.48       149
           2       0.00      0.00      0.00       149

    accuracy                           0.39       447
   macro avg       0.26      0.39      0.31       447
weighted avg       0.26      0.39      0.31       447

--------------------------------------------------
Accuracy: 0.39
Average Classification Report:
             

In [10]:

#print("========================== Data 1  =================================")
#X_train, X_test, y_train, y_test = train_test_split(data1, target1, test_size=0.2)
#tpot = TPOTClassifier(verbosity=2, config_dict='TPOT light')
#tpot.fit(X_train, y_train)
#accuracy = tpot.score(X_test, y_test)
#print(f"Acurácia do melhor modelo encontrado: {accuracy}")
#tpot.export('tpot_best_model1.py')
#print("========================== Data 2  =================================")
#X_train, X_test, y_train, y_test = train_test_split(data2, target2, test_size=0.2)
#tpot2 = TPOTClassifier(verbosity=2, config_dict='TPOT light')
#tpot2.fit(X_train, y_train)
#accuracy = tpot2.score(X_test, y_test)
#print(f"Acurácia do melhor modelo encontrado: {accuracy}")
##tpot2.export('tpot_best_model2.py')
#print("========================== Data 3  =================================")
#X_train, X_test, y_train, y_test = train_test_split(data3, target3, test_size=0.2)
#tpot3 = TPOTClassifier(verbosity=2, config_dict='TPOT light')
#tpot3.fit(X_train, y_train)
#accuracy = tpot3.score(X_test, y_test)
#print(f"Acurácia do melhor modelo encontrado: {accuracy}")
#tpot3.export('tpot_best_model3.py')
#print("========================== Data 4  =================================")
#X_train, X_test, y_train, y_test = train_test_split(data4, target4, test_size=0.2)
#$tpot4 = TPOTClassifier(verbosity=2,config_dict='TPOT light')
#tpot4.fit(X_train, y_train)
#accuracy = tpot4.score(X_test, y_test)
#print(f"Acurácia do melhor modelo encontrado: {accuracy}")
#tpot4.export('tpot_best_model4.py')