In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import warnings
warnings.filterwarnings('ignore')

In [12]:
sonar = pd.read_csv(r"..\Cases\Sonar\Sonar.csv")
# y = sonar['Class']
le = LabelEncoder()
y = le.fit_transform(sonar['Class'])
X = sonar.drop('Class', axis=1)
print(le.classes_)

['M' 'R']


In [13]:
gbm = GradientBoostingClassifier(random_state=24)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24,stratify=y)

In [15]:
gbm.fit(X_train, y_train)

y_pred = gbm.predict(X_test)
y_pred

array([0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0])

In [16]:
accuracy_score(y_test, y_pred)

0.746031746031746

In [17]:
y_pred_proba = gbm.predict_proba(X_test)
print(y_pred_proba)
print(log_loss(y_test, y_pred_proba))

[[6.60095988e-01 3.39904012e-01]
 [9.91188251e-01 8.81174921e-03]
 [9.61275926e-01 3.87240738e-02]
 [4.31469604e-01 5.68530396e-01]
 [9.96988713e-01 3.01128725e-03]
 [9.99116803e-01 8.83196826e-04]
 [2.50407401e-03 9.97495926e-01]
 [9.98396956e-01 1.60304377e-03]
 [1.29129269e-01 8.70870731e-01]
 [9.42331301e-05 9.99905767e-01]
 [9.26767270e-01 7.32327299e-02]
 [9.89457133e-01 1.05428666e-02]
 [9.87371283e-01 1.26287165e-02]
 [1.46885713e-04 9.99853114e-01]
 [2.10568822e-01 7.89431178e-01]
 [9.41790881e-01 5.82091186e-02]
 [9.99667330e-01 3.32670360e-04]
 [3.75877338e-01 6.24122662e-01]
 [9.80702403e-01 1.92975974e-02]
 [9.99282730e-01 7.17269945e-04]
 [2.53076849e-04 9.99746923e-01]
 [7.86938778e-01 2.13061222e-01]
 [1.14509235e-03 9.98854908e-01]
 [6.25117959e-01 3.74882041e-01]
 [8.30534864e-02 9.16946514e-01]
 [3.00700828e-03 9.96992992e-01]
 [4.48787374e-01 5.51212626e-01]
 [1.50614720e-03 9.98493853e-01]
 [9.99809960e-01 1.90040220e-04]
 [9.88934691e-01 1.10653087e-02]
 [9.996971

In [18]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
print(gbm.get_params())

{'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 3, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_iter_no_change': None, 'random_state': 24, 'subsample': 1.0, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}


In [23]:
params = {'learning_rate': np.linspace(0.001, 0.9,10), 
          'max_depth': [None,2,3,4], 
          'n_estimators':[25, 50 ,100],  
          }
gcv = GridSearchCV(gbm, param_grid=params, cv=kfold, 
                   scoring='neg_log_loss', n_jobs=-1)
gcv.fit(X, y)

print(gcv.best_score_)
print(gcv.best_params_)

-0.3922954270189254
{'learning_rate': 0.10088888888888889, 'max_depth': 2, 'n_estimators': 100}


In [22]:
pd_cv = pd.DataFrame(gcv.cv_results_)
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.228392,0.036078,0.001648,0.002019,0.001,,25,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.684015,-0.682227,-0.679191,-0.679707,-0.678227,-0.680674,0.002131,62
1,0.433477,0.077239,0.006465,0.007926,0.001,,50,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.676433,-0.673571,-0.670027,-0.669309,-0.667235,-0.671315,0.003274,56
2,0.814855,0.126356,0.001411,0.001362,0.001,,100,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.663074,-0.656561,-0.654194,-0.648814,-0.646567,-0.653842,0.005847,49
3,0.089243,0.007519,0.000752,0.001505,0.001,2,25,"{'learning_rate': 0.001, 'max_depth': 2, 'n_es...",-0.687835,-0.687064,-0.681893,-0.684818,-0.682902,-0.684902,0.002295,66
4,0.173773,0.013808,0.006459,0.007918,0.001,2,50,"{'learning_rate': 0.001, 'max_depth': 2, 'n_es...",-0.682561,-0.682202,-0.675040,-0.679869,-0.676581,-0.679251,0.002997,61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,0.237097,0.011462,0.003722,0.006062,0.9,3,50,"{'learning_rate': 0.9, 'max_depth': 3, 'n_esti...",-0.544435,-0.623741,-0.835967,-1.276308,-0.954733,-0.847037,0.259778,77
116,0.383736,0.018770,0.003223,0.006446,0.9,3,100,"{'learning_rate': 0.9, 'max_depth': 3, 'n_esti...",-0.690108,-0.857411,-1.135421,-1.512037,-1.005893,-1.040174,0.278873,91
117,0.158638,0.005518,0.000955,0.001230,0.9,4,25,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.674957,-0.855593,-0.842496,-0.714288,-0.938986,-0.805264,0.097013,72
118,0.294782,0.012034,0.001568,0.001921,0.9,4,50,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.591350,-1.112701,-0.756468,-1.108484,-1.205347,-0.954870,0.237805,83
