In [61]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import log_loss,accuracy_score
from sklearn.ensemble import RandomForestClassifier,StackingClassifier

import warnings
warnings.filterwarnings('ignore')

In [11]:
glass = pd.read_csv('Glass.csv')

In [12]:
glass.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,building_windows_float_processed
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,building_windows_float_processed
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,building_windows_float_processed
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,building_windows_float_processed
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,building_windows_float_processed


In [13]:
X = glass.drop('Type',axis=1)
y = glass['Type']

In [26]:
X_train , X_test , y_train , y_test = train_test_split(X,y,random_state=24,test_size=0.3,stratify=y)

In [39]:
knn = KNeighborsClassifier()
nb = GaussianNB()
dtc = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True,random_state=24)
lr = LogisticRegression(random_state=24)
rf = RandomForestClassifier(random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=lr)

In [28]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7230769230769231


In [29]:
y_pred_prob = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_prob))

0.726108562583823


#### Using passthrough=True option

In [30]:
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=lr,passthrough=True)

In [44]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7076923076923077


In [45]:
y_pred_prob = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_prob))

1.2222517693776007


#### using final estimator as rf

In [49]:
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=rf)

In [50]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7076923076923077


In [51]:
y_pred_prob = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_prob))

1.252898468303256


#### Results mostly depends on the final_estimator and fractionally on the remaining 
#### choose that as the final estimator which performs the best

#### using GridSearchCV

In [59]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
rf = RandomForestClassifier(n_estimators=10,random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=rf)

In [62]:
params = {'final_estimator__max_depth':[3,4,5],
         'SVM__C':np.linspace(0.001,3,5),
         'TREE__max_depth':[None,2,4],
         'final_estimator__n_estimators':[10,50],
         'passthrough':[True,False]}
gcv = GridSearchCV(stack,param_grid=params,scoring='neg_log_loss',verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 180 candidates, totalling 900 fits
[CV 1/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=-1.264 total time=   0.1s
[CV 2/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=-0.925 total time=   0.1s
[CV 3/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=-1.081 total time=   0.1s
[CV 4/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=-0.991 total time=   0.1s
[CV 5/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=-0.931 total time=   0.1s
[CV 1/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=3, final_estimator__n_estimators=10, pass

In [63]:
gcv.best_params_

{'SVM__C': 0.75075,
 'TREE__max_depth': 4,
 'final_estimator__max_depth': 5,
 'final_estimator__n_estimators': 50,
 'passthrough': True}

In [64]:
gcv.best_score_

-0.9316846592703227