In [None]:
! pip install catboost
! pip install optuna

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import optuna
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
from imblearn.under_sampling import RandomUnderSampler

warnings.filterwarnings('ignore')

In [14]:
train = pd.read_csv('train_processed.csv')

In [15]:
tr1 = train[['Age', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage', 'Region_Code']]
tr2 = train.drop(['Age', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage', 'Region_Code'], axis=1)
scaler = StandardScaler()
tr1 = scaler.fit_transform(tr1)
y = tr2['Response'].to_numpy()
train = np.hstack((tr1, tr2.drop(['Response'], axis=1).to_numpy()))
X = train.copy()
train, tr1, tr2 = 0, 0, 0

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, y_train = RandomUnderSampler(sampling_strategy='majority', random_state=1).fit_resample(X_train, y_train)

In [None]:
def objective_catboost(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-2, 10),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'bagging_temperature': trial.suggest_loguniform('bagging_temperature', 0.01, 1.0),
        'random_strength': trial.suggest_loguniform('random_strength', 1e-2, 10)
    }

    model = CatBoostClassifier(**params, verbose=0, random_state=42)
    fitted = model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=30, verbose=0) 
    y_pred = model.predict(X_test)
    score = roc_auc_score(y_test, y_pred) 
    return score

def objective_xgboost(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 200, 500),
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.1, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1.0)
    }
    
    model = XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss')
    fitted = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = roc_auc_score(y_test, y_pred)
    return score

In [None]:
def objective_catboost(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-2, 10),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'bagging_temperature': trial.suggest_loguniform('bagging_temperature', 0.01, 1.0),
        'random_strength': trial.suggest_loguniform('random_strength', 1e-2, 10)
    }

    model = CatBoostClassifier(**params, verbose=0, random_state=42)
    fitted = model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=30, verbose=0) 
    y_pred = model.predict(X_test)
    score = roc_auc_score(y_test, y_pred) 
    return score

def objective_xgboost(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 200, 500),
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.1, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1.0)
    }
    
    model = XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss')
    fitted = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = roc_auc_score(y_test, y_pred)
    return score

In [None]:
# Оптимизация гиперпараметров CatBoost
study = optuna.create_study(direction='maximize')
study.optimize(objective_catboost, n_trials=100)
catboost_params = study.best_params

In [None]:
# Оптимизация гиперпараметров xgboost
study2 = optuna.create_study(direction='maximize')
study2.optimize(objective_xgboost, n_trials=100)
xgboost_params = study2.best_params

In [None]:
X, y = RandomUnderSampler(sampling_strategy='majority', random_state=1).fit_resample(X, y)

In [None]:
catboost_model = CatBoostClassifier(**catboost_params, random_state=42)
catboost_model.fit(X, y, verbose=0)

In [None]:
xgboost_model = XGBClassifier(**xgboost_params, use_label_encoder=False, eval_metric='logloss')
xgboost_model.fit(X, y)