In [None]:
def objective(trial):

    param_grid = {
        'max_depth': trial.suggest_int('max_depth', 6, 10),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1e-1, log=True),
        'n_estimators': trial.suggest_categorical('n_estimators', [50,100,150]),
        #'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        #'n_estimators': trial.suggest_discrete_uniform('n_estimators', 100, 500, 10),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        #'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1.0),
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        #'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 6),
        'eta': trial.suggest_discrete_uniform('eta', 0.01, 0.1, 0.01),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.4, 0.9),
        #"colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
        #'eval_metric': 'mlogloss'         #'eval_metric': 'binary:logistic'
        'seed': RANDOM_SEED     
        
    }

    model = XGBClassifier(**param_grid)
    #model.fit(X_train, y_train)
    model.fit(X_train, y_train,eval_set=[(X_val,y_val)], early_stopping_rounds=100,verbose=False)
    yhat_xgb = model.predict(X_val)

    return f1_score(y_val, yhat_xgb, average='macro')
    #number_folds = 5
    #Kfold = StratifiedKFold(n_splits=number_folds, shuffle=True, random_state=RANDOM_SEED )
    #scores = cross_val_score(xgb, X, y, cv=Kfold, scoring="precision")


In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100); #, show_progress_bar=True

trial = study.best_trial

print('Best hyperparam: {}'.format(trial.params))

In [None]:
# Print the result
xgb_best_params = study.best_params
xgb_best_score = study.best_value
print(f"Best score: {xgb_best_score}\n")
print(f"Optimized parameters: {xgb_best_params}\n")

In [None]:
model_xgb_tuned = XGBClassifier(**xgb_best_params)
y_prob_xgb_tuned, y_pred_xgb_tuned, scores_xgb_tuned  = CVS(model_xgb_tuned,X,y,'xgb_tuned')  

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
xgb_best_params = {#'max_depth': 10,
 'learning_rate': 0.20,
 'n_estimators': 100,
 'reg_alpha': 2.12e-08,
 'reg_lambda': 4.85e-08,
 'min_child_weight': 2,
 'eta': 0.09,                  
 'colsample_bytree': 0.70}


param_range = np.arange(3, 20, 1)


train_scores = [ ]
test_scores = [ ]
for i in param_range:
    xgb = XGBClassifier(**xgb_best_params,
        max_depth = i
)
    xgb.fit(X_train,y_train)
    train_scores.append(f1_score(y_train,xgb.predict(X_train)))
    test_scores.append(f1_score(y_val,xgb.predict(X_val)))

plt.subplots(1, figsize=(7,7))
plt.plot(param_range, train_scores, label="Training", color="black")
plt.plot(param_range, test_scores, label="Validation", color="dimgrey")

# plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, color="gray")
# plt.fill_between(param_range, test_mean - test_std, test_mean + test_std, color="gainsboro")
 
plt.title("Comparison Train x Validation - XGBoost")
plt.xlabel("Max Depth")
plt.ylabel("F1 Score")
plt.tight_layout()
plt.legend(loc="best")
plt.show()

In [None]:
xgb_best_params = {'max_depth': 10,
 'learning_rate': 0.20,
 #'n_estimators': 100,
 'reg_alpha': 2.12e-08,
 'reg_lambda': 4.85e-08,
 'min_child_weight': 2,
 'eta': 0.09,                  
 'colsample_bytree': 0.70}


param_range = np.arange(10, 300, 10)


train_scores = [ ]
test_scores = [ ]
for i in param_range:
    xgb = XGBClassifier(**xgb_best_params,
        n_estimators = i
)
    xgb.fit(X_train,y_train)
    train_scores.append(f1_score(y_train,xgb.predict(X_train)))
    test_scores.append(f1_score(y_val,xgb.predict(X_val)))

plt.subplots(1, figsize=(7,7))
plt.plot(param_range, train_scores, label="Training", color="black")
plt.plot(param_range, test_scores, label="Validation", color="dimgrey")


# plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, color="gray")
# plt.fill_between(param_range, test_mean - test_std, test_mean + test_std, color="gainsboro")
 
plt.title("Comparison Train x Validation - XGBoost")
plt.xlabel("Number of Estimators")
plt.ylabel("F1 Score")
plt.tight_layout()
plt.legend(loc="best")
plt.show()