In [1]:
import pandas as pd
import xgboost as xgb
import optuna

In [2]:
train = pd.read_csv("Dataset/train_fe_fillna_lessrow_lessunique_le_ohe.csv")
test = pd.read_csv("Dataset/test_fe_fillna_lessrow_lessunique_le_ohe.csv")
ids = pd.read_csv("Dataset/test.csv")["id"]

In [3]:
X_train = train.iloc[:,:-1]
X_test = test
y=train["damage_grade"]
y -= 1

In [4]:
def objective(trial):
    cv_params = {
        "early_stopping_rounds": 10,
        "nfold": 5,
        "metrics": 'mlogloss',  # Use 'mlogloss' for multiclass classification
        "num_boost_round": trial.suggest_int('num_boost_round', 500, 2000),
    }
    model_params = {
        "eval_metric": 'mlogloss',  # Use 'mlogloss' for multiclass classification
        "objective": "multi:softmax",  # Objective function for multiclass classification
        "num_class": 5,  # Replace 'num_classes' with the number of classes in your dataset
        "eta": trial.suggest_float('eta', 0.01, 0.2),
        "max_depth": trial.suggest_int('max_depth', 2, 10),
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
        "n_jobs": -1,
        'tree_method': 'gpu_hist'
    }
    data_dmatrix = xgb.DMatrix(data=X_train, label=y)  # Use one-hot encoded y_train for multiclass classification
    xgb_cv = xgb.cv(dtrain=data_dmatrix, params=model_params, **cv_params)
    return xgb_cv['test-mlogloss-mean'].iloc[-1]  # Return the mlogloss score for optimization


In [5]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

[I 2023-08-03 10:43:43,066] A new study created in memory with name: no-name-88f41b07-9e29-46b2-a29f-78deb177513e
[I 2023-08-03 10:44:05,029] Trial 0 finished with value: 0.7533323589119958 and parameters: {'num_boost_round': 1599, 'eta': 0.09537563938235687, 'max_depth': 6, 'lambda': 0.5051744111174215, 'alpha': 1.709516031375505, 'colsample_bytree': 0.5, 'subsample': 0.5, 'min_child_weight': 76}. Best is trial 0 with value: 0.7533323589119958.
[I 2023-08-03 10:44:25,338] Trial 1 finished with value: 0.7522538857274597 and parameters: {'num_boost_round': 1026, 'eta': 0.12054871585816153, 'max_depth': 4, 'lambda': 7.391452659356109, 'alpha': 9.104376666013527, 'colsample_bytree': 0.9, 'subsample': 0.8, 'min_child_weight': 140}. Best is trial 1 with value: 0.7522538857274597.
[I 2023-08-03 10:44:33,406] Trial 2 finished with value: 0.7528302425078952 and parameters: {'num_boost_round': 649, 'eta': 0.18930991883522205, 'max_depth': 7, 'lambda': 2.8376831302419516, 'alpha': 6.982738902900

In [6]:
model = xgb.XGBClassifier(
    **study.best_params,
    n_estimators=study.best_params['num_boost_round'],
    verbosity=0,
    tree_method="gpu_hist"
)
model.fit(X_train, y)

In [7]:
prediction = model.predict(X_test)
prediction += 1
submission = pd.DataFrame({
    'id': ids,
    'damage_grade': prediction
})
submission.to_csv('Submissions/xgboost_optuna_fe_fillna_lessrow_lessunique_le_ohe.csv', index=False)
print('Successfully made a prediction!')

Successfully made a prediction!
