In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import log_loss,accuracy_score
from sklearn.ensemble import RandomForestClassifier,StackingClassifier

import warnings
warnings.filterwarnings('ignore')

In [5]:
satellite = pd.read_csv('Satellite.csv',sep=';')

In [6]:
satellite.head()

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,grey soil
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,grey soil
2,84,102,102,83,80,102,102,79,84,94,...,87,84,99,104,79,84,99,104,79,grey soil
3,80,102,102,79,84,94,102,79,80,94,...,79,84,99,104,79,84,103,104,79,grey soil
4,84,94,102,79,80,94,98,76,80,102,...,79,84,103,104,79,79,107,109,87,grey soil


In [7]:
X = satellite.drop('classes',axis=1)
y = satellite['classes']

In [9]:
X_train , X_test , y_train , y_test = train_test_split(X,y,random_state=24,test_size=0.3,stratify=y)

In [10]:
knn = KNeighborsClassifier()
nb = GaussianNB()
dtc = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True,random_state=24)
lr = LogisticRegression(random_state=24)
rf = RandomForestClassifier(random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=lr)

In [11]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.905748316934231


In [12]:
y_pred_prob = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_prob))

0.2709870109450135


#### Using passthrough=True option

In [13]:
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=lr,passthrough=True)

In [14]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7928534438114966


In [15]:
y_pred_prob = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_prob))

0.5362448607568437


#### using final estimator as rf

In [16]:
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=rf)

In [17]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.905748316934231


In [18]:
y_pred_prob = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_prob))

0.3357089071730576


#### using GridSearchCV

In [19]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
rf = RandomForestClassifier(n_estimators=10,random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=rf)

In [21]:
params = {'final_estimator__max_depth':[2],
         'SVM__C':np.linspace(0.001,3,5),
         'TREE__max_depth':[None,2],
         'passthrough':[True,False]}
gcv = GridSearchCV(stack,param_grid=params,scoring='neg_log_loss',verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=True;, score=-0.857 total time= 1.2min
[CV 2/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=True;, score=-0.852 total time= 1.2min
[CV 3/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=True;, score=-0.953 total time= 1.2min
[CV 4/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=True;, score=-0.875 total time= 1.2min
[CV 5/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=True;, score=-0.875 total time= 1.2min
[CV 1/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=False;, score=-0.861 total time= 1.2min
[CV 2/5] END SVM__C=0.001, TREE__max_depth=None, final_estimator__max_depth=2, passthrough=False;, score=-0.850 total time= 1.2min
[CV 3/5] END SVM__C=0.001,

In [22]:
gcv.best_params_

{'SVM__C': 3.0,
 'TREE__max_depth': None,
 'final_estimator__max_depth': 2,
 'passthrough': False}

In [23]:
gcv.best_score_

-0.8250715217991876