In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.metrics import f1_score, classification_report, log_loss
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from catboost import CatBoostClassifier


In [3]:
df = pd.read_csv('../Datasets/cases/Sonar/Sonar.csv')
X = df.drop('Class', axis=1)
y = df['Class']
le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25, stratify=y)

rates = np.linspace(0.01, 0.8, 20)
n_est = np.arange(50, 200, 30)
depths = [None, 2, 3, 4, 5, 7]
scores = []

for r in tqdm(rates):
    for n in n_est:
        for d in depths:
            lgbm = CatBoostClassifier(random_state=23, n_estimators=n, max_depth=d, learning_rate=r, verbose=0)
            lgbm.fit(X_train, y_train)
            y_pred_proba = lgbm.predict_proba(X_test)
            y_pred = lgbm.predict(X_test)
            scores.append([r,n,d,log_loss(y_test, y_pred_proba), f1_score(y_test, y_pred)])

scores = pd.DataFrame(scores, columns=['learning rate', 'estimator', 'max depth', 'log loss', 'f1'])
scores.sort_values(by='log loss')

100%|██████████| 20/20 [04:50<00:00, 14.51s/it]


Unnamed: 0,learning rate,estimator,max depth,log loss,f1
400,0.550526,80,5.0,0.284434,0.739130
394,0.550526,50,5.0,0.284942,0.739130
244,0.342632,50,5.0,0.286261,0.867925
250,0.342632,80,5.0,0.289589,0.867925
364,0.508947,50,5.0,0.290428,0.840000
...,...,...,...,...,...
506,0.675263,170,3.0,0.689932,0.693878
489,0.675263,80,4.0,0.693096,0.625000
495,0.675263,110,4.0,0.701015,0.638298
507,0.675263,170,4.0,0.712377,0.638298


In [4]:
scores.sort_values(by='f1', ascending=False)

Unnamed: 0,learning rate,estimator,max depth,log loss,f1
508,0.675263,170,5.0,0.379464,0.888889
502,0.675263,140,5.0,0.380680,0.872727
496,0.675263,110,5.0,0.381847,0.872727
268,0.342632,170,5.0,0.298553,0.867925
244,0.342632,50,5.0,0.286261,0.867925
...,...,...,...,...,...
13,0.010000,110,2.0,0.569074,0.590909
2,0.010000,50,3.0,0.600681,0.577778
3,0.010000,50,4.0,0.586398,0.577778
1,0.010000,50,2.0,0.612410,0.565217


In [5]:

rates = np.linspace(0.01, 0.8, 20)
n_est = np.arange(50, 200, 30)
depths = [None, 2, 3, 4, 5, 7]
scores = []

for r in tqdm(rates):
    for n in n_est:
        for d in depths:
            lgbm = CatBoostClassifier(random_state=25, n_estimators=n, max_depth=d, learning_rate=r, verbose=0)
            lgbm.fit(X_train, y_train)
            y_pred_proba = lgbm.predict_proba(X_test)
            y_pred = lgbm.predict(X_test)
            scores.append([r,n,d,log_loss(y_test, y_pred_proba), f1_score(y_test, y_pred)])

scores = pd.DataFrame(scores, columns=['learning rate', 'estimator', 'max depth', 'log loss', 'f1'])
scores.sort_values(by='log loss')

100%|██████████| 20/20 [04:46<00:00, 14.31s/it]


Unnamed: 0,learning rate,estimator,max depth,log loss,f1
197,0.259474,110,7.0,0.375601,0.765957
185,0.259474,50,7.0,0.375816,0.750000
203,0.259474,140,7.0,0.375985,0.765957
191,0.259474,80,7.0,0.376326,0.765957
209,0.259474,170,7.0,0.377004,0.791667
...,...,...,...,...,...
441,0.592105,140,4.0,0.881386,0.666667
435,0.592105,110,4.0,0.883927,0.666667
589,0.800000,140,2.0,0.883930,0.625000
595,0.800000,170,2.0,0.884959,0.638298


In [6]:
scores.sort_values(by='f1', ascending=False)

Unnamed: 0,learning rate,estimator,max depth,log loss,f1
323,0.425789,140,7.0,0.460141,0.840000
336,0.467368,80,,0.442966,0.840000
313,0.425789,110,2.0,0.448309,0.830189
319,0.425789,140,2.0,0.448580,0.830189
553,0.758421,110,2.0,0.505387,0.830189
...,...,...,...,...,...
417,0.550526,170,4.0,0.847467,0.577778
399,0.550526,80,4.0,0.826361,0.565217
393,0.550526,50,4.0,0.831622,0.565217
411,0.550526,140,4.0,0.839559,0.565217
