# XGBoost + Optuna

In [None]:
### if you want to install optuna just uncomment the code bellow
#!pip install optuna 

In [2]:
import pandas as pd
import numpy as np


## optuna modules
import optuna
import functools

## import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score,accuracy_score,roc_auc_score,make_scorer
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold,TimeSeriesSplit,train_test_split
import xgboost as xgb

### Dataset

In [3]:
df = pd.read_csv("data.csv")
X = df.drop(columns=["id", "Unnamed: 32", "diagnosis"])
y = df["diagnosis"].map({'B': 0, 'M': 1})
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=8)

In [4]:
std= StandardScaler()

In [5]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform (X_test)

In [6]:
X_train=pd.DataFrame(X_train)
X_test=pd.DataFrame(X_test)

### Hypertune

In [8]:
def opt(X_train, y_train, X_test, y_test, trial):
    #param_list
    n_estimators = trial.suggest_int('n_estimators', 0, 1000)
    max_depth = trial.suggest_int('max_depth', 1, 20)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 20)
    #learning_rate = trial.suggest_discrete_uniform('learning_rate', 0.01, 0.1, 0.01)
    scale_pos_weight = trial.suggest_int('scale_pos_weight', 1, 100)
    subsample = trial.suggest_discrete_uniform('subsample', 0.5, 0.9, 0.1)
    colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 0.9, 0.1)

    xgboost_tuna = xgb.XGBClassifier(
        random_state=42, 
        tree_method='gpu_hist',
        n_estimators = n_estimators,
        max_depth = max_depth,
        min_child_weight = min_child_weight,
        #learning_rate = learning_rate,
        scale_pos_weight = scale_pos_weight,
        subsample = subsample,
        colsample_bytree = colsample_bytree,
    )
    xgboost_tuna.fit(X_train, y_train)
    tuna_pred_test = xgboost_tuna.predict(X_test)
    
    return (1.0 - (accuracy_score(y_test, tuna_pred_test)))

In [13]:
study = optuna.create_study()
study.optimize(functools.partial(opt, X_train, y_train, X_test, y_test), n_trials=100)

In [14]:
clf = xgb.XGBClassifier(tree_method='gpu_hist',**study.best_params)
clf.fit(X_train, y_train)
y_pred_rf = clf.predict_proba(X_test)[:,1]
# Getting AUC
auc_rf = roc_auc_score(y_test, y_pred_rf)

In [12]:
# Print results
print(f"The AUC of XGBOOST + Optuna is {auc_rf:.5f}")

The AUC of XGBOOST + Optuna is 0.99567
