# Catboost Evaluation

## 1. Generate Random Data

In [1]:
import sys
sys.path.append('../../src')

import pandas as pd
import numpy as np
import random
from random_data_generator import random_data_generator

#tunning
from tuning import find_best_model

#models
from xgboost import XGBClassifier
from catboost import CatBoostClassifier, Pool
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from evaluator import MultiClassEvaluator


#metrics
from sklearn.metrics import roc_auc_score

random.seed(42)

In [2]:
dataset = random_data_generator("multiclass", 5)


## 3. Catboost

In [4]:


scores = []
scores_class1 = []
scores_class2 = []
scores_class3 = []
for i in range(0, len(dataset)):
    x_train, y_train, x_val, y_val, x_test, y_test = dataset[i][0], dataset[i][1], dataset[i][2], dataset[i][3], dataset[i][4], dataset[i][5]
    best_params = find_best_model("multi_cat", x_train, y_train, x_val, y_val, trials=60)
    cat_clf = CatBoostClassifier(**best_params.params, random_seed=42).fit(x_train, y_train, eval_set=[(x_val, y_val)], verbose=False)
    evaluator = MultiClassEvaluator(cat_clf)
    evaluator.evaluate(x_test, y_test)
    scores.append(evaluator.get_rocau_spec(0))
    scores_class1.append(evaluator.get_rocau_spec(1))
    scores_class2.append(evaluator.get_rocau_spec(2))
    scores_class3.append(evaluator.get_rocau_spec(3))



[I 2024-03-18 09:46:06,123] A new study created in memory with name: no-name-519f0a79-0c36-4d72-9a09-834070aff991


  0%|          | 0/60 [00:00<?, ?it/s]

[I 2024-03-18 09:46:15,220] Trial 0 finished with value: 0.7263775475712251 and parameters: {'learning_rate': 0.06736526292566136, 'max_depth': 5, 'n_estimators': 99, 'l2_leaf_reg': 0.3050235007102542}. Best is trial 0 with value: 0.7263775475712251.
[I 2024-03-18 09:46:30,909] Trial 1 finished with value: 0.7291081354629424 and parameters: {'learning_rate': 0.00778042007033149, 'max_depth': 7, 'n_estimators': 111, 'l2_leaf_reg': 0.7712212447155757}. Best is trial 1 with value: 0.7291081354629424.
[I 2024-03-18 09:46:49,393] Trial 2 finished with value: 0.7486283669506874 and parameters: {'learning_rate': 0.06128707913076782, 'max_depth': 7, 'n_estimators': 145, 'l2_leaf_reg': 0.0033930634079131217}. Best is trial 2 with value: 0.7486283669506874.
[I 2024-03-18 09:46:55,099] Trial 3 finished with value: 0.7126552775695253 and parameters: {'learning_rate': 0.011647473982859636, 'max_depth': 3, 'n_estimators': 76, 'l2_leaf_reg': 0.41440841624483127}. Best is trial 2 with value: 0.7486283

[I 2024-03-18 10:11:17,444] A new study created in memory with name: no-name-6ad1729d-5a7d-4a09-9b56-b5aa804ef850


F1 Score: 0.23595112214017308
ROC AUC Score: 0.7940611379934558
ROC AUC Score Class 1: 0.7301600503080703
ROC AUC Score Class 2: 0.7610523822691728
ROC AUC Score Class 3: 0.9020017226114984


  0%|          | 0/60 [00:00<?, ?it/s]

[I 2024-03-18 10:11:49,504] Trial 0 finished with value: 0.6220357472517479 and parameters: {'learning_rate': 0.04940267634634648, 'max_depth': 7, 'n_estimators': 149, 'l2_leaf_reg': 0.6528275523694127}. Best is trial 0 with value: 0.6220357472517479.
[I 2024-03-18 10:13:08,458] Trial 1 finished with value: 0.6077299378871333 and parameters: {'learning_rate': 0.0981865423927658, 'max_depth': 8, 'n_estimators': 143, 'l2_leaf_reg': 0.3099485622260185}. Best is trial 0 with value: 0.6220357472517479.
[I 2024-03-18 10:13:22,077] Trial 2 finished with value: 0.6234928908375328 and parameters: {'learning_rate': 0.0970007867504525, 'max_depth': 5, 'n_estimators': 89, 'l2_leaf_reg': 0.3868308943842642}. Best is trial 2 with value: 0.6234928908375328.
[I 2024-03-18 10:13:39,860] Trial 3 finished with value: 0.6214168812226366 and parameters: {'learning_rate': 0.08836439968239265, 'max_depth': 5, 'n_estimators': 122, 'l2_leaf_reg': 0.40564544049661366}. Best is trial 2 with value: 0.623492890837

[I 2024-03-18 10:30:39,685] A new study created in memory with name: no-name-2e426871-d976-44e5-b9d7-8ed59fa3671e


F1 Score: 0.2875538714718434
ROC AUC Score: 0.7750099913005112
ROC AUC Score Class 1: 0.8449315036619504
ROC AUC Score Class 2: 0.6963651378421359
ROC AUC Score Class 3: 0.7797185338577527


  0%|          | 0/60 [00:00<?, ?it/s]

[I 2024-03-18 10:30:48,264] Trial 0 finished with value: 0.7435479609756339 and parameters: {'learning_rate': 0.009101258951210164, 'max_depth': 2, 'n_estimators': 133, 'l2_leaf_reg': 0.13914579007261219}. Best is trial 0 with value: 0.7435479609756339.
[I 2024-03-18 10:31:00,211] Trial 1 finished with value: 0.7978617030972749 and parameters: {'learning_rate': 0.07894880631357774, 'max_depth': 6, 'n_estimators': 100, 'l2_leaf_reg': 0.4636796109863649}. Best is trial 1 with value: 0.7978617030972749.
[I 2024-03-18 10:31:06,225] Trial 2 finished with value: 0.7836291005197258 and parameters: {'learning_rate': 0.08382091024783571, 'max_depth': 2, 'n_estimators': 94, 'l2_leaf_reg': 0.019444799179716088}. Best is trial 1 with value: 0.7978617030972749.
[I 2024-03-18 10:31:32,510] Trial 3 finished with value: 0.7816508939567595 and parameters: {'learning_rate': 0.04879138340611103, 'max_depth': 8, 'n_estimators': 76, 'l2_leaf_reg': 0.1631382430175784}. Best is trial 1 with value: 0.79786170

[I 2024-03-18 10:50:44,153] A new study created in memory with name: no-name-3f54a7e4-a2b4-4722-b7f2-2861a5fee8e4


F1 Score: 0.3194480850419098
ROC AUC Score: 0.7504748350162156
ROC AUC Score Class 1: 0.7958231821532387
ROC AUC Score Class 2: 0.6438097427000152
ROC AUC Score Class 3: 0.854244006016091


  0%|          | 0/60 [00:00<?, ?it/s]

[I 2024-03-18 10:51:01,837] Trial 0 finished with value: 0.6936386642764787 and parameters: {'learning_rate': 0.009375252406526112, 'max_depth': 6, 'n_estimators': 101, 'l2_leaf_reg': 0.2355440491424261}. Best is trial 0 with value: 0.6936386642764787.
[I 2024-03-18 10:51:26,075] Trial 1 finished with value: 0.696752329034403 and parameters: {'learning_rate': 0.02504862432452608, 'max_depth': 5, 'n_estimators': 78, 'l2_leaf_reg': 0.31624483661054104}. Best is trial 1 with value: 0.696752329034403.
[I 2024-03-18 10:51:43,826] Trial 2 finished with value: 0.7012341024692641 and parameters: {'learning_rate': 0.09382194930764656, 'max_depth': 3, 'n_estimators': 86, 'l2_leaf_reg': 0.830634771672212}. Best is trial 2 with value: 0.7012341024692641.
[I 2024-03-18 10:52:23,934] Trial 3 finished with value: 0.7109885315966388 and parameters: {'learning_rate': 0.048646627936931745, 'max_depth': 5, 'n_estimators': 136, 'l2_leaf_reg': 0.4902825265824684}. Best is trial 3 with value: 0.710988531596

[I 2024-03-18 11:39:52,982] A new study created in memory with name: no-name-9769150e-3d0c-41b6-a560-15923babca16


F1 Score: 0.23887378305244725
ROC AUC Score: 0.7803600184517747
ROC AUC Score Class 1: 0.6925050780703264
ROC AUC Score Class 2: 0.7261899695478022
ROC AUC Score Class 3: 0.9336287792261829


  0%|          | 0/60 [00:00<?, ?it/s]

[I 2024-03-18 11:40:09,650] Trial 0 finished with value: 0.7809552331566145 and parameters: {'learning_rate': 0.046571252019130885, 'max_depth': 2, 'n_estimators': 82, 'l2_leaf_reg': 0.8230381454404972}. Best is trial 0 with value: 0.7809552331566145.
[I 2024-03-18 11:40:46,163] Trial 1 finished with value: 0.8062949577078095 and parameters: {'learning_rate': 0.09113936211599168, 'max_depth': 5, 'n_estimators': 121, 'l2_leaf_reg': 0.41973104344726964}. Best is trial 1 with value: 0.8062949577078095.
[I 2024-03-18 11:41:05,270] Trial 2 finished with value: 0.7879001138396227 and parameters: {'learning_rate': 0.04604233473141312, 'max_depth': 2, 'n_estimators': 102, 'l2_leaf_reg': 0.5184766573481012}. Best is trial 1 with value: 0.8062949577078095.
[I 2024-03-18 11:41:31,600] Trial 3 finished with value: 0.7874854298867698 and parameters: {'learning_rate': 0.01792097087575961, 'max_depth': 6, 'n_estimators': 62, 'l2_leaf_reg': 0.33951932253884}. Best is trial 1 with value: 0.806294957707

In [6]:
mean =  np.mean(np.array(scores))
mean_class1 =  np.mean(np.array(scores_class1))
mean_class2 =  np.mean(np.array(scores_class2))
mean_class3 =  np.mean(np.array(scores_class3))
print(f"Mean Scores: {mean}")
print(f"Mean Scores Class 1: {mean_class1}")
print(f"Mean Scores Class 2: {mean_class2}")
print(f"Mean Scores Class 3: {mean_class3}")

Mean Scores: 0.7670997843665426
Mean Scores Class 1: 0.7643689900936437
Mean Scores Class 2: 0.7068202038955613
Mean Scores Class 3: 0.8510427909846717
