In [3]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.metrics import f1_score, classification_report, log_loss
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from xgboost import XGBClassifier


In [4]:
df = pd.read_csv('../Datasets/cases/Sonar/Sonar.csv')
X = df.drop('Class', axis=1)
y = df['Class']
le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25, stratify=y)


In [8]:
rates = np.linspace(0.01, 0.8, 20)
n_est = np.arange(50, 200, 30)
depths = [None, 2, 3, 5, 7]
scores = []

for rate in tqdm(rates):
    for n in n_est:
        for d in depths:
            gbm = XGBClassifier(random_state=25, n_estimators=n, max_depth=d, learning_rate=rate)
            gbm.fit(X_train, y_train)
            y_pred_proba = gbm.predict_proba(X_test)
            y_pred = gbm.predict(X_test)
            scores.append([rate, n, d, log_loss(y_test, y_pred_proba), f1_score(y_test, y_pred)])
        
scores_df = pd.DataFrame(scores, columns=['learning_rate', 'n_estimators', 'max_depth', 'log_loss', 'f1'])
scores_df.sort_values(by='log_loss')

100%|██████████| 20/20 [01:11<00:00,  3.59s/it]


Unnamed: 0,learning_rate,n_estimators,max_depth,log_loss,f1
179,0.301053,50,7.0,0.422442,0.769231
175,0.301053,50,,0.422442,0.769231
178,0.301053,50,5.0,0.422442,0.769231
131,0.217895,80,2.0,0.422918,0.734694
126,0.217895,50,2.0,0.428102,0.708333
...,...,...,...,...,...
431,0.716842,80,2.0,0.671560,0.652174
436,0.716842,110,2.0,0.671560,0.652174
441,0.716842,140,2.0,0.671560,0.652174
446,0.716842,170,2.0,0.671560,0.652174
