In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train = pd.read_csv("../Data/train.csv")

train = train.drop("id", axis = 1)

defect_counts = train[["Pastry","Z_Scratch","K_Scatch","Stains","Dirtiness","Bumps","Other_Faults"]].sum(axis=1)
train = train[defect_counts==1]

train['target'] = train[["Pastry","Z_Scratch","K_Scatch","Stains","Dirtiness","Bumps","Other_Faults"]].apply(lambda row: row.idxmax(), axis=1)
train = train.drop(["Pastry","Z_Scratch","K_Scatch","Stains","Dirtiness","Bumps","Other_Faults"],axis = 1)

label_map = {'Pastry': 0, 'Z_Scratch': 1, 'K_Scatch': 2, 'Stains': 3, 'Dirtiness': 4, 'Bumps': 5, 'Other_Faults': 6}
train['target'] = train['target'].map(label_map)

In [19]:
train['TypeOfSteel'] = train.apply(lambda row: 0 if row['TypeOfSteel_A300'] == 1 else (1 if row['TypeOfSteel_A400'] == 1 else None), axis=1)
train = train.drop(["TypeOfSteel_A300", "TypeOfSteel_A400"], axis = 1)
train.dropna(inplace=True)

In [25]:
import optuna
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, make_scorer

X = train.drop("target", axis=1)
y = train["target"]

multiroc = make_scorer(roc_auc_score, multi_class='ovo',needs_proba=True)

def objective(trial):
    params = {
        'grow_policy': trial.suggest_categorical('grow_policy', ["depthwise", "lossguide"]),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'gamma' : trial.suggest_float('gamma', 1e-9, 0.5),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'max_depth': trial.suggest_int('max_depth', 0, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-9, 100.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-9, 100.0, log=True),
        
    }

    cv_scores = cross_val_score(XGBClassifier(**params), X, y, cv=5, scoring=multiroc)

    
    return cv_scores.mean()






In [26]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[I 2024-03-04 14:46:40,628] A new study created in memory with name: no-name-300aa884-4554-4796-9a62-afb7b95cb4c5
[I 2024-03-04 14:50:18,881] Trial 0 finished with value: 0.8973402045231953 and parameters: {'grow_policy': 'lossguide', 'n_estimators': 803, 'learning_rate': 0.020141107046487246, 'gamma': 0.1767989045415461, 'subsample': 0.6771220571136365, 'colsample_bytree': 0.5477455763292887, 'max_depth': 9, 'min_child_weight': 6, 'reg_lambda': 0.010026470993996046, 'reg_alpha': 9.889751727227905e-07}. Best is trial 0 with value: 0.8973402045231953.
[I 2024-03-04 14:50:41,145] Trial 1 finished with value: 0.885115145971562 and parameters: {'grow_policy': 'lossguide', 'n_estimators': 347, 'learning_rate': 0.046909211849456046, 'gamma': 0.06033733534814755, 'subsample': 0.7360667937996177, 'colsample_bytree': 0.820626329243197, 'max_depth': 2, 'min_child_weight': 6, 'reg_lambda': 1.2813872103948067e-05, 'reg_alpha': 63.74581051275238}. Best is trial 0 with value: 0.8973402045231953.
[I 

KeyboardInterrupt: 

In [27]:
study.stop()

RuntimeError: `Study.stop` is supposed to be invoked inside an objective function or a callback.