In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier 
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from catboost import CatBoostClassifier
import optuna
from sklearn.model_selection import RandomizedSearchCV
from random import randint

In [2]:
train = pd.read_csv("train_folds.csv")
test = pd.read_csv("cleaned_test.csv")
sample = pd.read_csv("SampleSubmission.csv")

useful_features = [i for i in train.columns if i not in("stra_kfold", "Response")]

In [3]:
def run(trial):
            fold=0
            learning_rate = trial.suggest_float("learning_rate", 1e-2, 0.25, log=True)
            max_depth = trial.suggest_int("max_depth", 1,7)
            n_estimators = trial.suggest_int("n_estimators", 100,2000)

            x_train = train[train.stra_kfold != fold].reset_index(drop=True)
            x_valid = train[train.stra_kfold == fold].reset_index(drop=True)

            y_train = x_train.Response
            y_valid = x_valid.Response

            x_train = x_train[useful_features]
            x_valid = x_valid[useful_features]

            sc = StandardScaler()
            scaled_x_train = pd.DataFrame(sc.fit_transform(x_train))
            scaled_x_valid = pd.DataFrame(sc.transform(x_valid))
            #scaled_test = pd.DataFrame(sc.transform(df_test))

            scaled_x_train.columns = x_train.columns
            scaled_x_valid.columns = x_valid.columns
            #scaled_test.columns = df_test.columns


            model = CatBoostClassifier(random_state=42,
                scale_pos_weight = 5.56,
                verbose = 0,
                n_estimators= n_estimators,
                learning_rate = learning_rate,
                max_depth = max_depth,
             )
            model.fit(scaled_x_train, y_train)
            preds = model.predict(scaled_x_valid)
            score = model.score(scaled_x_valid, y_valid)
            #score = model.score(scaled_x_train, y_train)
        
            return(score)

In [4]:
study = optuna.create_study(direction="maximize")
study.optimize(run,n_trials=5)

[32m[I 2021-10-09 09:11:06,328][0m A new study created in memory with name: no-name-9b4ab11f-160d-43d3-af18-5f56ac38c516[0m
[32m[I 2021-10-09 09:11:12,617][0m Trial 0 finished with value: 0.8775510204081632 and parameters: {'learning_rate': 0.1004261502680816, 'max_depth': 7, 'n_estimators': 401}. Best is trial 0 with value: 0.8775510204081632.[0m
[32m[I 2021-10-09 09:11:16,383][0m Trial 1 finished with value: 0.8418367346938775 and parameters: {'learning_rate': 0.02177806857872265, 'max_depth': 3, 'n_estimators': 815}. Best is trial 0 with value: 0.8775510204081632.[0m
[32m[I 2021-10-09 09:11:20,062][0m Trial 2 finished with value: 0.8724489795918368 and parameters: {'learning_rate': 0.029359395889084264, 'max_depth': 7, 'n_estimators': 478}. Best is trial 0 with value: 0.8775510204081632.[0m
[32m[I 2021-10-09 09:11:28,420][0m Trial 3 finished with value: 0.8724489795918368 and parameters: {'learning_rate': 0.0110632638051735, 'max_depth': 5, 'n_estimators': 1975}. Best 

In [5]:
study.best_params

{'learning_rate': 0.1004261502680816, 'max_depth': 7, 'n_estimators': 401}