In [1]:
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import pandas as pd
import os

# Carregar base de dados
diretorio_atual = os.getcwd()

caminho_dataset = os.path.join(diretorio_atual, 'datasets', 'parkinsons.data')

df = pd.read_csv(caminho_dataset)

X = df.drop(['status', 'name', 'APQ', 'D2',
            'Fhi(Hz)', 'Flo(Hz)', 'Fo(Hz)',
             'PPQ', 'RAP', 'spread1', 'spread2'], axis=1)

y = df['status']

X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.2)



In [2]:
from sklearn.metrics import roc_auc_score

def treinar_modelo(params):
    learning_rate = params[0]
    min_child_weight = params[1]
    max_depth = params[2]
    colsample_bytree = params[3]
    gamma = params[4]
    scale_pos_weight = params[5]
    
    print(params, '\n')

    modelo = XGBClassifier(learning_rate=learning_rate, min_child_weight=min_child_weight,
                            max_depth=max_depth, colsample_bytree=colsample_bytree,
                            gamma=gamma, scale_pos_weight=scale_pos_weight, n_estimators=50) # número de arvores é definido como fixo
    modelo.fit(X_treino, y_treino)

    proba = modelo.predict_proba(X_teste)[:, 1]

    return -1 * roc_auc_score(y_teste, proba) # multiplicado por -1 porque é preciso minimizar a negativa do auc não o próprio auc


space = [(1e-3, 1, 'log-uniform'), # learning_rate, log-uniform dá mais importância para números menores 
         (1, 10), # min_child_weight
         (3, 10), # max_depth
         (0.5, 1.0), # colsample_bytree
         (0, 5), # gamma
         (1, 10)] # scale_pos_weight

# resultado = dummy_minimize(treinar_modelo, space, random_state=1, verbose=1, n_calls=30)
# resultado.x

Otimização Bayesiana

In [3]:
from skopt import gp_minimize

resultados_gp = gp_minimize(treinar_modelo, space, verbose=1, n_calls=30, n_random_starts=10)

Iteration No: 1 started. Evaluating function at random point.
[0.014261599458822608, 4, 3, 0.5406236543582036, 3, 10] 

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.0603
Function value obtained: -0.8638
Current minimum: -0.8638
Iteration No: 2 started. Evaluating function at random point.
[0.051602506627090636, 3, 7, 0.6162861005899127, 5, 1] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.0194
Function value obtained: -0.8948
Current minimum: -0.8948
Iteration No: 3 started. Evaluating function at random point.
[0.01315628220653826, 2, 4, 0.8287369912080995, 4, 9] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0666
Function value obtained: -0.8276
Current minimum: -0.8948
Iteration No: 4 started. Evaluating function at random point.
[0.3180967559385836, 10, 7, 0.7922045417078025, 0, 3] 

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.0327
Function value obtained: -0.9138
Current minimum: -

Exploration Exploitation Tradeoff

In [4]:
resultados_gp.x


[0.4277922868401173, 2, 9, 0.7920368639829974, 4, 4]