In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from data import get_data
from sklearn.model_selection import cross_val_score
import optuna
import numpy as np
import pickle

In [None]:
data = pd.read_csv('data.csv')

In [None]:
X = data.drop('target', axis=1)
y = data['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def objective(trial):
    
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'tree_method': 'gpu_hist',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=50),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_float('lambda', 1e-8, 10.0, log=True),  # L2
        'alpha': trial.suggest_float('alpha', 1e-8, 10.0, log=True)    # L1 
    }
    
    xgb_model = XGBClassifier(**params, use_label_encoder=False)
    xgb_model.fit(X_train, y_train)

    scores = cross_val_score(xgb_model, X_test, y_test, cv=5, scoring='accuracy')

    return np.mean(scores)


In [None]:
study = optuna.create_study(direction='maximize')