In [2]:
from sklearn.model_selection import train_test_split
import optuna
from sklearn.metrics import f1_score
import catboost as cb
import json
import pandas as pd
from sklearn.preprocessing import RobustScaler

In [4]:
df = pd.read_csv("../dataset/processed/targetECTrain.csv")

X = df.drop(["damage_grade"], axis=1)
y = df["damage_grade"]

s = RobustScaler()

X_s = pd.DataFrame(s.fit_transform(X))
X_s.columns = X.columns
X = X_s

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=69420)

In [16]:
import warnings

warnings.filterwarnings("ignore")

In [18]:
def objective(trial):
    param = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 10000),
            "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 1.0),
            "max_depth": trial.suggest_int("max_depth", 1, 16),
            "random_state": 69420,
        }

    gbm = cb.CatBoostClassifier(**param)

    gbm.fit(X_train, y_train, eval_set=[
            (X_test, y_test)], verbose=0, early_stopping_rounds=100)

    preds = gbm.predict(X_test)
    return f1_score(y_test, preds, average='macro')


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50, timeout=600)


[32m[I 2023-04-11 00:15:32,289][0m A new study created in memory with name: no-name-5ac07c74-793e-46dd-b2ac-c88cdcb19f78[0m
[32m[I 2023-04-11 00:15:33,581][0m Trial 0 finished with value: 0.6534933151655921 and parameters: {'n_estimators': 5832, 'learning_rate': 0.5146199629844437, 'max_depth': 4}. Best is trial 0 with value: 0.6534933151655921.[0m
