In [25]:
import pandas as pd

df = pd.read_csv('creditcard.csv')

X = df.drop('Class', axis=1).copy()

y = df['Class'].copy()



In [26]:
from sklearn.model_selection import train_test_split

X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.2, stratify=y)

y_treino.value_counts()

Class
0    227451
1       394
Name: count, dtype: int64

In [27]:
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score
from imblearn.pipeline import Pipeline as imbPipeline
from imblearn.over_sampling import SMOTE

def treinar_modelo(params):
    learning_rate = params[0]
    min_child_weight = params[1]
    max_depth = params[2]
    colsample_bytree = params[3]
    gamma = params[4]
    scale_pos_weight = params[5]
    
    print(params, '\n')


    pipe = imbPipeline(steps=[('SMOTE', SMOTE()),
                              ('StandardScaler', StandardScaler()),
                              ('XGBClassifier', XGBClassifier(learning_rate=learning_rate, min_child_weight=min_child_weight,
                                                              max_depth=max_depth, colsample_bytree=colsample_bytree,
                                                              gamma=gamma, scale_pos_weight=scale_pos_weight, n_estimators=50))])


    pipe.fit(X_treino, y_treino)

    proba = pipe.predict_proba(X_teste)[:, 1]

    return -1 * roc_auc_score(y_teste, proba) # multiplicado por -1 porque é preciso minimizar a negativa do auc não o próprio auc


space = [(1e-3, 1, 'log-uniform'), # learning_rate, log-uniform dá mais importância para números menores 
         (1, 10), # min_child_weight
         (3, 10), # max_depth
         (0.5, 1.0), # colsample_bytree
         (0, 5), # gamma
         (1, 10)] # scale_pos_weight

In [28]:
from skopt import gp_minimize

resultados_gp = gp_minimize(treinar_modelo, space, verbose=1, n_calls=30, n_random_starts=10, acq_func='gp_hedge')

Iteration No: 1 started. Evaluating function at random point.
[0.004673447687442937, 7, 4, 0.6882157072531008, 4, 2] 

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 2.3895
Function value obtained: -0.9772
Current minimum: -0.9772
Iteration No: 2 started. Evaluating function at random point.
[0.005700415612265633, 7, 8, 0.6911343750703985, 3, 4] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 2.5284
Function value obtained: -0.9833
Current minimum: -0.9833
Iteration No: 3 started. Evaluating function at random point.
[0.0011950516558566905, 7, 8, 0.8524776192425616, 1, 10] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.3926
Function value obtained: -0.9751
Current minimum: -0.9833
Iteration No: 4 started. Evaluating function at random point.
[0.04822242215552746, 5, 7, 0.7712175592293227, 0, 5] 

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 2.2543
Function value obtained: -0.9712
Current minimum:

In [30]:
resultados_gp.x


[0.009561472363794643, 10, 10, 0.5, 0, 1]