In [1]:
!pip install catboost
!pip install optuna



In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import catboost
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier
from sklearn.metrics import f1_score,classification_report,accuracy_score
import optuna
from optuna.samplers import TPESampler
%matplotlib inline

In [3]:
df=pd.read_csv("/content/data_sample.csv")
df.shape

(3079, 294)

In [4]:
df['target'] = np.where(df['ResponseRate'] == 0, 'B1',
                        np.where((df['ResponseRate'] > 0) & (df['ResponseRate'] < 0.15), 'B2',
                                 np.where(df['ResponseRate'] >= 0.15, 'B3', 'Not Specified')))

In [5]:
df['target'].value_counts()

B1    1635
B3     855
B2     589
Name: target, dtype: int64

In [6]:
# List the columns to remove
columns_to_remove = ['OfferHistoryID', 'ResponseRate']
# Drop the specified columns from the DataFrame
df.drop(columns=columns_to_remove, inplace=True)

In [7]:
df.shape

(3079, 293)

In [8]:
# Splitting the dataset into X and y
X = df.drop('target', axis=1)  # Features: all columns except 'target'
y = df['target']

In [9]:
# Splitting dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**HYPERPARAMETER_TUNING**

In [11]:
def objective(trial):
    params = {
        "iterations":500,
        #"logging_level":'silent',
        #"verbose":0,
        "learning_rate": 0.001,
        "max_depth": trial.suggest_int("max_depth", 3,10),
        # "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20,100),
        "random_state": 42,
        # "subsample": trial.suggest_uniform('subsample', 0.5, 1.0),
        "colsample_bylevel": trial.suggest_float('colsample_bylevel', 0.4, 1.0),
        "random_strength": trial.suggest_float("random_strength", 1, 10.0),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.5, 10.0),
    }

    model = catboost.CatBoostClassifier(**params, silent=True)
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)
    f1 = f1_score(y_test, y_pred_test,average='weighted')
    return f1

sampler = TPESampler(seed=42)
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=200)

print('Best hyperparameters:', study.best_params)
print('Best F1:', study.best_value)

[I 2024-04-02 11:13:40,959] A new study created in memory with name: no-name-38544773-c993-45bd-8562-0139b7302909
[I 2024-04-02 11:13:45,069] Trial 0 finished with value: 0.9321456845600832 and parameters: {'max_depth': 5, 'colsample_bylevel': 0.9704285838459497, 'random_strength': 7.587945476302646, 'bagging_temperature': 6.187255599871848}. Best is trial 0 with value: 0.9321456845600832.
[I 2024-04-02 11:13:47,208] Trial 1 finished with value: 0.9321456845600832 and parameters: {'max_depth': 4, 'colsample_bylevel': 0.49359671220172163, 'random_strength': 1.5227525095137953, 'bagging_temperature': 8.728673384861883}. Best is trial 0 with value: 0.9321456845600832.
[I 2024-04-02 11:13:52,873] Trial 2 finished with value: 0.9321456845600832 and parameters: {'max_depth': 7, 'colsample_bylevel': 0.8248435466776274, 'random_strength': 1.185260448662222, 'bagging_temperature': 9.714143595538946}. Best is trial 0 with value: 0.9321456845600832.
[I 2024-04-02 11:14:02,871] Trial 3 finished wi

Best hyperparameters: {'max_depth': 9, 'colsample_bylevel': 0.9041020801524965, 'random_strength': 1.3325737915591278, 'bagging_temperature': 5.349254817690343}
Best F1: 0.9336477623111783


In [None]:
#[I 2024-01-24 13:19:08,146] Trial 23 finished with value: 0.7698620649790703 and parameters: {'max_depth': 10, 'min_data_in_leaf': 85, 'colsample_bylevel': 0.7016150314426027, 'random_strength': 8.542169542677488, 'bagging_temperature': 8.681806510285387}. Best is trial 23 with value: 0.7698620649790703.

In [None]:
# Best hyperparameters: {'max_depth': 5, 'colsample_bylevel': 0.8574269294896714, 'random_strength': 4.887505167779041, 'bagging_temperature': 3.2666768318813983}
# Best F1: 0.9351504102964708