In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import warnings
warnings.filterwarnings('ignore')

In [10]:
sonar = pd.read_csv(r"..\Cases\Sonar\Sonar.csv")
# y = sonar['Class']
le = LabelEncoder()
y = le.fit_transform(sonar['Class'])
X = sonar.drop('Class', axis=1)
print(le.classes_)

['M' 'R']


In [11]:
lgbm = LGBMClassifier(random_state=24)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24,stratify=y)

In [13]:
lgbm.fit(X_train, y_train)

y_pred = lgbm.predict(X_test)
y_pred

array([0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0])

In [14]:
accuracy_score(y_test, y_pred)

0.7777777777777778

In [15]:
y_pred_proba = lgbm.predict_proba(X_test)
print(y_pred_proba)
print(log_loss(y_test, y_pred_proba))

[[9.70615732e-01 2.93842684e-02]
 [9.06990084e-01 9.30099163e-02]
 [7.64385029e-01 2.35614971e-01]
 [1.02527374e-01 8.97472626e-01]
 [9.91577048e-01 8.42295224e-03]
 [9.96284872e-01 3.71512844e-03]
 [5.40549123e-03 9.94594509e-01]
 [9.80764684e-01 1.92353163e-02]
 [6.48429626e-02 9.35157037e-01]
 [3.35695991e-04 9.99664304e-01]
 [9.55243389e-01 4.47566115e-02]
 [9.72485631e-01 2.75143687e-02]
 [9.97489053e-01 2.51094667e-03]
 [1.06702371e-03 9.98932976e-01]
 [2.49639934e-01 7.50360066e-01]
 [5.28557649e-01 4.71442351e-01]
 [9.99952927e-01 4.70731065e-05]
 [2.51883065e-01 7.48116935e-01]
 [9.59107599e-01 4.08924005e-02]
 [9.76051273e-01 2.39487273e-02]
 [4.17538629e-04 9.99582461e-01]
 [4.82522308e-01 5.17477692e-01]
 [5.92057905e-03 9.94079421e-01]
 [7.26397072e-01 2.73602928e-01]
 [8.46803170e-01 1.53196830e-01]
 [1.30641859e-02 9.86935814e-01]
 [2.43472552e-01 7.56527448e-01]
 [3.75380998e-03 9.96246190e-01]
 [9.99960506e-01 3.94937361e-05]
 [8.52990298e-01 1.47009702e-01]
 [9.994350

In [16]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
print(lgbm.get_params())

{'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': -1, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': None, 'num_leaves': 31, 'objective': None, 'random_state': 24, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0}


In [17]:
params = {'learning_rate': np.linspace(0.001, 0.9,10), 
          'max_depth': [None,3,2,4], 
          'n_estimators':[25, 50 ,100],  
          }
gcv = GridSearchCV(lgbm, param_grid=params, cv=kfold, 
                   scoring='neg_log_loss')
gcv.fit(X, y)

print(gcv.best_score_)
print(gcv.best_params_)

-0.34998617816974675
{'learning_rate': 0.10088888888888889, 'max_depth': 4, 'n_estimators': 50}


In [18]:
pd_cv = pd.DataFrame(gcv.cv_results_)
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.018366,0.003181,0.004901,0.001142,0.001,,25,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.684938,-0.681833,-0.681419,-0.681052,-0.683472,-0.682543,0.001455,101
1,0.027512,0.007072,0.002185,0.001909,0.001,,50,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.678234,-0.672112,-0.674015,-0.671092,-0.676984,-0.674487,0.002745,95
2,0.037434,0.005948,0.003234,0.001619,0.001,,100,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.666027,-0.653635,-0.661182,-0.652818,-0.663665,-0.659465,0.005326,88
3,0.014333,0.001562,0.002512,0.002063,0.001,3,25,"{'learning_rate': 0.001, 'max_depth': 3, 'n_es...",-0.684913,-0.681833,-0.681437,-0.681011,-0.683499,-0.682539,0.001456,100
4,0.024646,0.006943,0.005289,0.003740,0.001,3,50,"{'learning_rate': 0.001, 'max_depth': 3, 'n_es...",-0.678166,-0.672112,-0.674023,-0.671111,-0.677021,-0.674486,0.002728,94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,0.017608,0.001824,0.000000,0.000000,0.9,2,50,"{'learning_rate': 0.9, 'max_depth': 2, 'n_esti...",-0.587426,-0.643205,-0.830473,-0.603139,-0.737314,-0.680311,0.091387,99
116,0.028346,0.007981,0.003856,0.006324,0.9,2,100,"{'learning_rate': 0.9, 'max_depth': 2, 'n_esti...",-0.573037,-0.747193,-0.938728,-0.775346,-0.689624,-0.744786,0.119242,114
117,0.016330,0.002513,0.000341,0.000681,0.9,4,25,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.565353,-0.609221,-0.778250,-0.382906,-0.735122,-0.614171,0.139601,75
118,0.017894,0.002590,0.008772,0.007541,0.9,4,50,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.602054,-0.675622,-1.151118,-0.530898,-0.861974,-0.764333,0.222635,116
