In [35]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split,GridSearchCV,StratifiedKFold
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,roc_auc_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,StackingClassifier
 

In [15]:
cancer = pd.read_csv('BreastCancer.csv').set_index('Code')

In [16]:
X = cancer.drop('Class',axis=1)
y = cancer['Class']

In [20]:
X_train , X_test , y_train , y_test = train_test_split(X,y,random_state=24,test_size=0.3)

In [18]:
knn = KNeighborsClassifier()
nb = GaussianNB()
dtc = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True,random_state=24)
lr = LogisticRegression(random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=lr)

In [23]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.9809523809523809


In [26]:
y_pred_prob = stack.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_prob))

0.9988001199880011


#### Using passthrough=True option

In [27]:
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=lr,passthrough=True)

In [28]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.9761904761904762


In [29]:
y_pred_prob = stack.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_prob))

0.9986001399860014


In [36]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
rf = RandomForestClassifier(n_estimators=10,random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('TREE',dtc),('SVM',svm)],final_estimator=rf)

In [40]:
params = {
         'SVM__C':np.linspace(0.001,3,5),
         'TREE__max_depth':[None,2],
         'passthrough':[True,False]}
gcv = GridSearchCV(stack,param_grid=params,scoring='neg_log_loss',verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=True;, score=-0.340 total time=   0.3s
[CV 2/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=True;, score=-0.626 total time=   0.3s
[CV 3/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=True;, score=-0.056 total time=   0.3s
[CV 4/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=True;, score=-0.564 total time=   0.3s
[CV 5/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=True;, score=-0.048 total time=   0.3s
[CV 1/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=False;, score=-0.823 total time=   0.4s
[CV 2/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=False;, score=-1.597 total time=   0.3s
[CV 3/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=False;, score=-0.057 total time=   0.3s
[CV 4/5] END SVM__C=0.001, TREE__max_depth=None, passthrough=False;, score=-0.546 total time=   0.3s
[CV 5/5] END SVM__C=0.001, TREE__m

In [41]:
import pickle

In [42]:
best_stack = gcv.best_estimator_