In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train = pd.read_csv("../Data/train.csv")

train = train.drop("id", axis = 1)

defect_counts = train[["Pastry","Z_Scratch","K_Scatch","Stains","Dirtiness","Bumps","Other_Faults"]].sum(axis=1)
train = train[defect_counts==1]

train['target'] = train[["Pastry","Z_Scratch","K_Scatch","Stains","Dirtiness","Bumps","Other_Faults"]].apply(lambda row: row.idxmax(), axis=1)
train = train.drop(["Pastry","Z_Scratch","K_Scatch","Stains","Dirtiness","Bumps","Other_Faults"],axis = 1)

label_map = {'Pastry': 0, 'Z_Scratch': 1, 'K_Scatch': 2, 'Stains': 3, 'Dirtiness': 4, 'Bumps': 5, 'Other_Faults': 6}
train['target'] = train['target'].map(label_map)

In [7]:
train['TypeOfSteel'] = train.apply(lambda row: 0 if row['TypeOfSteel_A300'] == 1 else (1 if row['TypeOfSteel_A400'] == 1 else None), axis=1)
train = train.drop(["TypeOfSteel_A300", "TypeOfSteel_A400"], axis = 1)
train.dropna(inplace=True)

In [18]:
import optuna
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, make_scorer

X = train.drop("target", axis=1)
y = train["target"]

multiroc = make_scorer(roc_auc_score, multi_class='ovo',needs_proba=True)

def objective(trial):
    params = {
        'grow_policy': trial.suggest_categorical('grow_policy', ["depthwise", "lossguide"]),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'gamma' : trial.suggest_float('gamma', 1e-9, 0.5),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'max_depth': trial.suggest_int('max_depth', 0, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-9, 100.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-9, 100.0, log=True),
        
    }

    cv_scores = cross_val_score(XGBClassifier(**params), X, y, cv=5, scoring=multiroc)

    
    return cv_scores.mean()






In [19]:
pruner = optuna.pruners.MedianPruner()

study = optuna.create_study(direction='maximize', pruner=pruner)

study.optimize(objective, n_trials=1000, n_jobs = -1)

[I 2024-03-05 07:07:38,212] A new study created in memory with name: no-name-36feed9f-2b7d-4e90-9569-cd3fbc66d9f8
[I 2024-03-05 07:08:32,306] Trial 4 finished with value: 0.8928897969927039 and parameters: {'grow_policy': 'depthwise', 'n_estimators': 266, 'learning_rate': 0.873915845293492, 'gamma': 0.06486931067946723, 'subsample': 0.4995465222174628, 'colsample_bytree': 0.9460949931200888, 'max_depth': 1, 'min_child_weight': 2, 'reg_lambda': 5.2694647940069425e-09, 'reg_alpha': 0.0176646550847285}. Best is trial 4 with value: 0.8928897969927039.
[I 2024-03-05 07:09:40,268] Trial 5 finished with value: 0.9033839407973027 and parameters: {'grow_policy': 'depthwise', 'n_estimators': 407, 'learning_rate': 0.09457108290086648, 'gamma': 0.46144648366167146, 'subsample': 0.46086670465362456, 'colsample_bytree': 0.7290192267965077, 'max_depth': 2, 'min_child_weight': 6, 'reg_lambda': 4.1042016894760335e-05, 'reg_alpha': 0.00013596068499991234}. Best is trial 5 with value: 0.9033839407973027.

In [16]:
import xgboost as xgb
import optuna
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, make_scorer


X = train.drop("target", axis=1)
y = train["target"]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)


multiroc = make_scorer(roc_auc_score, multi_class='ovo',needs_proba=True)

def objective(trial):
    params = {
        'grow_policy': trial.suggest_categorical('grow_policy', ["depthwise", "lossguide"]),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'gamma' : trial.suggest_float('gamma', 1e-9, 0.5),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'max_depth': trial.suggest_int('max_depth', 0, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-9, 100.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-9, 100.0, log=True),
        
    }

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

    # Convert data to DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dvalid = xgb.DMatrix(X_valid, label=y_valid)

    # Pruning callback
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "validation-merror")

    # Train model
    bst = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], callbacks=[pruning_callback])

    # Evaluate model
    preds = bst.predict(dvalid)
    auc = roc_auc_score(y_valid, preds)
    return auc





In [17]:
study = optuna.create_study(direction='maximize', pruner=optuna.pruners.MedianPruner())

# Run optimization
study.optimize(objective, n_trials=1000)

[I 2024-03-05 07:01:52,375] A new study created in memory with name: no-name-fc0d0cdf-8f4b-4669-a175-ce05faf8b1da


[0]	validation-rmse:2.03505


[W 2024-03-05 07:01:52,715] Trial 5 failed with parameters: {'grow_policy': 'depthwise', 'n_estimators': 206, 'learning_rate': 0.09199558206648867, 'gamma': 0.05659487030290863, 'subsample': 0.32781683691899044, 'colsample_bytree': 0.4310247186124726, 'max_depth': 4, 'min_child_weight': 3, 'reg_lambda': 3.7636385719737877e-07, 'reg_alpha': 3.8788100510748245} because of the following error: KeyError('validation-merror').
Traceback (most recent call last):
  File "c:\Users\Janith\anaconda3\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Janith\AppData\Local\Temp\ipykernel_13036\1277427834.py", line 41, in objective
    bst = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], callbacks=[pruning_callback])
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Janith\anaconda3\Lib\site-packages\xgboost\core.py", line 

[0]	validation-rmse:1.95896


[W 2024-03-05 07:01:52,746] Trial 6 failed with parameters: {'grow_policy': 'lossguide', 'n_estimators': 373, 'learning_rate': 0.3222293137681159, 'gamma': 0.3087903333179828, 'subsample': 0.9602421126951031, 'colsample_bytree': 0.3001274201521635, 'max_depth': 3, 'min_child_weight': 1, 'reg_lambda': 3.634622865085053e-09, 'reg_alpha': 1.3715551781122241e-07} because of the following error: KeyError('validation-merror').
Traceback (most recent call last):
  File "c:\Users\Janith\anaconda3\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Janith\AppData\Local\Temp\ipykernel_13036\1277427834.py", line 41, in objective
    bst = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], callbacks=[pruning_callback])
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Janith\anaconda3\Lib\site-packages\xgboost\core.py", line 

[0]	validation-rmse:1.84853


[W 2024-03-05 07:01:52,857] Trial 3 failed with parameters: {'grow_policy': 'lossguide', 'n_estimators': 753, 'learning_rate': 0.6873166229289224, 'gamma': 0.07774714370247535, 'subsample': 0.8070758055546496, 'colsample_bytree': 0.40439675233202727, 'max_depth': 6, 'min_child_weight': 4, 'reg_lambda': 0.03066678888594875, 'reg_alpha': 4.9784095150545195e-08} because of the following error: KeyError('validation-merror').
Traceback (most recent call last):
  File "c:\Users\Janith\anaconda3\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Janith\AppData\Local\Temp\ipykernel_13036\1277427834.py", line 41, in objective
    bst = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], callbacks=[pruning_callback])
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Janith\anaconda3\Lib\site-packages\xgboost\core.py", line 

[0]	validation-rmse:1.99472


[W 2024-03-05 07:01:58,479] Trial 0 failed with parameters: {'grow_policy': 'lossguide', 'n_estimators': 737, 'learning_rate': 0.15500732055677105, 'gamma': 0.25236829131261745, 'subsample': 0.7867249055730776, 'colsample_bytree': 0.9600923072201759, 'max_depth': 11, 'min_child_weight': 4, 'reg_lambda': 0.0014976143759822072, 'reg_alpha': 8.170325152001774e-06} because of the following error: KeyError('validation-merror').
Traceback (most recent call last):
  File "c:\Users\Janith\anaconda3\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Janith\AppData\Local\Temp\ipykernel_13036\1277427834.py", line 41, in objective
    bst = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], callbacks=[pruning_callback])
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Janith\anaconda3\Lib\site-packages\xgboost\core.py", lin

[0]	validation-rmse:2.06620


[W 2024-03-05 07:02:24,886] Trial 2 failed with parameters: {'grow_policy': 'lossguide', 'n_estimators': 677, 'learning_rate': 0.01607121273972752, 'gamma': 0.1285767039299686, 'subsample': 0.4814874566775613, 'colsample_bytree': 0.518841730789027, 'max_depth': 0, 'min_child_weight': 6, 'reg_lambda': 2.1215003879957627e-08, 'reg_alpha': 0.026202863765273633} because of the following error: KeyError('validation-merror').
Traceback (most recent call last):
  File "c:\Users\Janith\anaconda3\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Janith\AppData\Local\Temp\ipykernel_13036\1277427834.py", line 41, in objective
    bst = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], callbacks=[pruning_callback])
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Janith\anaconda3\Lib\site-packages\xgboost\core.py", line 7

KeyboardInterrupt: 