In [7]:
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import pandas as pd
import os

# Carregar base de dados
diretorio_atual = os.getcwd()

caminho_dataset = os.path.join(diretorio_atual, 'datasets', 'parkinsons.data')

df = pd.read_csv(caminho_dataset)

X = df.drop(['status', 'name', 'APQ', 'D2',
            'Fhi(Hz)', 'Flo(Hz)', 'Fo(Hz)',
             'PPQ', 'RAP', 'spread1', 'spread2'], axis=1)

y = df['status']

X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.2)



In [8]:
from sklearn.metrics import roc_auc_score

def treinar_modelo(params):
    learning_rate = params[0]
    min_child_weight = params[1]
    max_depth = params[2]
    colsample_bytree = params[3]
    gamma = params[4]
    scale_pos_weight = params[5]
    
    print(params, '\n')

    modelo = XGBClassifier(learning_rate=learning_rate, min_child_weight=min_child_weight,
                            max_depth=max_depth, colsample_bytree=colsample_bytree,
                            gamma=gamma, scale_pos_weight=scale_pos_weight, n_estimators=50) # número de arvores é definido como fixo
    modelo.fit(X_treino, y_treino)

    proba = modelo.predict_proba(X_teste)[:, 1]

    return -1 * roc_auc_score(y_teste, proba) # multiplicado por -1 porque é preciso minimizar a negativa do auc não o próprio auc


space = [(1e-3, 1, 'log-uniform'), # learning_rate, log-uniform dá mais importância para números menores 
         (1, 10), # min_child_weight
         (3, 10), # max_depth
         (0.5, 1.0), # colsample_bytree
         (0, 5), # gamma
         (1, 10)] # scale_pos_weight

# resultado = dummy_minimize(treinar_modelo, space, random_state=1, verbose=1, n_calls=30)
# resultado.x

Otimização Bayesiana

In [9]:
from skopt import gp_minimize

resultados_gp = gp_minimize(treinar_modelo, space, verbose=1, n_calls=30, n_random_starts=10)

Iteration No: 1 started. Evaluating function at random point.
[0.0022530837017431704, 9, 9, 0.7018629805266077, 2, 5] 

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.0154
Function value obtained: -0.8763
Current minimum: -0.8763
Iteration No: 2 started. Evaluating function at random point.
[0.009019783049207255, 4, 4, 0.6218783964815431, 3, 7] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.0222
Function value obtained: -0.8636
Current minimum: -0.8763
Iteration No: 3 started. Evaluating function at random point.
[0.00102070257511997, 2, 4, 0.7848768656174478, 2, 3] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0337
Function value obtained: -0.8561
Current minimum: -0.8763
Iteration No: 4 started. Evaluating function at random point.
[0.0011641108497835557, 3, 8, 0.6574254627824498, 4, 3] 

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.0275
Function value obtained: -0.8535
Current minimum:

Exploration Exploitation Tradeoff

In [10]:
resultados_gp.x


[0.41993086033563604, 3, 5, 0.5666598641230399, 0, 4]