In [9]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.metrics import f1_score, classification_report, log_loss
from sklearn.model_selection import train_test_split
from tqdm import tqdm


In [10]:
df = pd.read_csv('../Datasets/cases/Sonar/Sonar.csv')
X = df.drop('Class', axis=1)
y = df['Class']
le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25, stratify=y)


In [11]:
eta = np.linspace(0.1,0.8,20)
n_est = [50,100,200]
m_dp = [None,3,5]
scores = []

for e in tqdm(eta):
    for n in n_est:
        for m in m_dp:
            gbc = GradientBoostingClassifier(learning_rate=e, n_estimators=n, max_depth=m)
            gbc.fit(X_train,y_train)
            y_pred_proba = gbc.predict_proba(X_test)
            scores.append([e,n,m,log_loss(y_test,y_pred_proba)])

scores = pd.DataFrame(scores, columns=['learning rate', 'n_est', 'max depth', 'log loss'])
scores.sort_values(by='log loss', ascending=True)

100%|██████████| 20/20 [01:08<00:00,  3.43s/it]


Unnamed: 0,learning rate,n_est,max depth,log loss
10,0.136842,50,3.0,0.443439
1,0.100000,50,3.0,0.453560
4,0.100000,100,3.0,0.454564
73,0.394737,50,3.0,0.513560
19,0.173684,50,3.0,0.531296
...,...,...,...,...
81,0.431579,50,,4.026044
162,0.763158,50,,4.026939
174,0.800000,100,,4.112179
177,0.800000,200,,4.218453
