## Initial Settings

In [1]:
import os
import numpy as np
import pandas as pd
import warnings

from google.colab import drive

warnings.filterwarnings('ignore')
drive.mount("/content/drive")

os.chdir("drive/MyDrive/competition/2022-AI-competition-Round1") # Local Path
os.listdir()

Mounted at /content/drive


['competition_data',
 'html',
 'submission',
 'playground.ipynb',
 'AutoML Baseline.ipynb',
 'Evaluator Module.ipynb',
 '2022-08-04 feature EDA.ipynb',
 "2022-08-05 LGB_train(hyeonbin's parameter tuning).ipynb",
 'model compare.ipynb',
 'Optuna Optimization.ipynb',
 '2022-08-06 Optimize ET.ipynb',
 '2022-08-06 Regressor model compare.ipynb',
 'catboost_info',
 'evaluator',
 '.git',
 '.gitignore',
 'GitHub Connection.ipynb',
 'README.md',
 'Models(XGB_ET) Optimization.ipynb']

In [None]:
!pip install -r evaluator/requirements.txt

In [4]:
from evaluator.evaluator import Evaluator, Model

train_df = pd.read_csv('competition_data/train.csv')
test_df = pd.read_csv("competition_data/test.csv")
submission_df = pd.read_csv("competition_data/sample_submission.csv")

### XGB, ET Baseline

In [5]:
evaluator = Evaluator(
    **Model(train_df, "xgb", "rgr").get_model()
)
evaluator.run()



Unnamed: 0_level_0,accuracy,precision,recall,f1-score,roc_auc,mae,train_acc
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.731791,0.736153,0.814031,0.773136,0.80054,0.365784,
2,0.731791,0.724733,0.818703,0.768858,0.808989,0.363743,
3,0.734605,0.737245,0.812254,0.772934,0.804817,0.366591,
4,0.740857,0.762397,0.8,0.780746,0.800434,0.365568,
mean,0.734761,0.740132,0.811247,0.773918,0.803695,0.365422,0.759925


In [6]:
evaluator = Evaluator(
    **Model(train_df, "et", "rgr").get_model()
)
evaluator.run()

Unnamed: 0_level_0,accuracy,precision,recall,f1-score,roc_auc,mae,train_acc
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.777743,0.787798,0.826837,0.806846,0.868615,0.292263,
2,0.778368,0.779459,0.827309,0.802672,0.87313,0.295005,
3,0.772429,0.775853,0.830804,0.802389,0.866963,0.29954,
4,0.777743,0.804185,0.812466,0.808304,0.866602,0.294736,
mean,0.776571,0.786824,0.824354,0.805053,0.868828,0.295386,1.0


### Optuna Tuning

In [None]:
model = Model(train_df, "xgb", "rgr")
initial_params = (
    ("n_estimators", "int", (100, 1000)),
    ("objective", "static", "reg:squarederror"),
    ("average", "static", "micro"),
    ("learning_rate", "log", (1e-5, 1.0)),
    ("gamma", "int", (0, 5)),
    ("max_depth", "int", (5, 20)),
    ("lambda", "int", (0, 5)),
    ("alpha", "int", (0, 5)),
    ("subsample", "float", (0.7, 1))
)
model.optimize(initial_params, n_trials=100)

[32m[I 2022-08-11 13:56:24,194][0m A new study created in memory with name: no-name-12958a95-d781-4478-b080-2ec8bb89fbfa[0m
[32m[I 2022-08-11 14:03:24,398][0m Trial 0 finished with value: 0.7761455689228761 and parameters: {'n_estimators': 453, 'learning_rate': 0.0007856730140310485, 'gamma': 5, 'max_depth': 17, 'lambda': 1, 'alpha': 3, 'subsample': 0.7355154569866553}. Best is trial 0 with value: 0.7761455689228761.[0m
[32m[I 2022-08-11 14:05:26,742][0m Trial 1 finished with value: 0.79529413789646 and parameters: {'n_estimators': 205, 'learning_rate': 0.2958789835767843, 'gamma': 4, 'max_depth': 12, 'lambda': 1, 'alpha': 3, 'subsample': 0.8226586911409234}. Best is trial 1 with value: 0.79529413789646.[0m
[32m[I 2022-08-11 14:10:34,707][0m Trial 2 finished with value: 0.7708143519391617 and parameters: {'n_estimators': 849, 'learning_rate': 1.5963827767652806e-05, 'gamma': 1, 'max_depth': 9, 'lambda': 2, 'alpha': 3, 'subsample': 0.994672164308138}. Best is trial 1 with val

In [None]:
evaluator = Evaluator(
    **model.get_model()
)
evaluator.run()

In [25]:
params = {
    "objective": "reg:squarederror",
    "average": "micro",
    'n_estimators': 746, 
    'learning_rate': 0.0181863245309935, 
    'gamma': 0, 
    'max_depth': 11, 
    'lambda': 1, 
    'alpha': 4, 
    'subsample': 0.7231465475519483
}
evaluator = Evaluator(
    **Model(train_df, "xgb", "rgr", **params).get_model()
)
evaluator.run()

Unnamed: 0_level_0,accuracy,precision,recall,f1-score,roc_auc,mae,train_acc
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.784308,0.795091,0.829621,0.811989,0.870335,0.288269,
2,0.788059,0.783395,0.844521,0.812811,0.875054,0.289601,
3,0.778368,0.782173,0.833614,0.807075,0.867747,0.29597,
4,0.784308,0.80315,0.829268,0.816,0.866036,0.291083,
mean,0.783761,0.790952,0.834256,0.811969,0.869793,0.291231,1.0


In [26]:
submission_df = evaluator.make_submission(test_df, submission_df)
submission_df.to_csv("submission/2022-08-11_XGB_optim_20.csv", index=False)

ValueError: ignored

In [None]:
import optuna
optuna.visualization.plot_param_importances(model.optimizer.opt)

In [None]:
optuna.visualization.plot_optimization_history(model.optimizer.opt)

In [None]:
optuna.visualization.plot_slice(model.optimizer.opt)

In [None]:
model = Model(train_df, "lgbm", "rgr")
initial_params = (
    ("n_estimators", "int", (500, 5000)),
    ("objective", "static", "binary"),
    ("learning_rate", "log", (0.01, 1.0)),
    ("gamma", "int", (0, 5)),
    ("max_depth", "int", (10, 20)),
    ("lambda", "int", (1, 4)),
    ("alpha", "int", (1, 5)),
    ("subsample", "float", (0.8, 1))
)
model.optimize(initial_params, n_trials=500)