In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier,StackingClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,log_loss

In [2]:
bcancer=pd.read_csv('BreastCancer.csv',index_col=0)
lbl=LabelEncoder()
bcancer['Class']=lbl.fit_transform(bcancer['Class'])
X=bcancer.drop('Class',axis=1)
y=bcancer['Class']

In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=23,test_size=0.3)

In [4]:
lr=LogisticRegression()
svm=SVC(kernel='linear',probability=True,random_state=23)
dtc=DecisionTreeClassifier(random_state=23)
rf=RandomForestClassifier(random_state=23)

In [5]:
#Without PassThrough
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf)
stack.fit(X_train,y_train) #Excludes all ops on train set
y_pred= stack.predict(X_test)   #excludes all operation on test set
print(accuracy_score(y_test,y_pred))

0.9333333333333333


In [6]:
#With PassThrough
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf,passthrough=True)
stack.fit(X_train,y_train) #Excludes all ops on train set
y_pred= stack.predict(X_test)   #excludes all operation on test set
print(accuracy_score(y_test,y_pred))

0.9571428571428572


In [7]:
################################### Other Models ###################################

In [8]:
gbm=GradientBoostingClassifier(random_state=23)
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=gbm,passthrough=True)
stack.fit(X_train,y_train) #Excludes all ops on train set
y_pred= stack.predict(X_test)   #excludes all operation on test set
print(accuracy_score(y_test,y_pred))

0.9523809523809523


In [9]:
#Grid search CV
gbm=GradientBoostingClassifier(random_state=23)
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=gbm,passthrough=True)
params={'TREE__max_depth':[2,3,4,5,6,None],
        'TREE__min_samples_split':[2,5,10],
        'TREE__min_samples_leaf':[1,3,5,7,10,15],
       'LR__penalty':['l1','l2',None],
       'SVM__C':np.linspace(0.001,3,5),
       'final_estimator__learning_rate': [0.1,0.2],
       'final_estimator__max_depth': [2,3,4,5,6,None]}
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
gcv_stack=GridSearchCV(stack,param_grid=params,cv=kfold,scoring='neg_log_loss',verbose=3)
gcv_stack.fit(X,y)
print(gcv_stack.best_params_)
print(gcv_stack.best_score_)

Fitting 5 folds for each of 19440 candidates, totalling 97200 fits
[CV 1/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=2, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=2;, score=nan total time=   0.0s
[CV 2/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=2, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=2;, score=nan total time=   0.0s
[CV 3/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=2, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=2;, score=nan total time=   0.0s
[CV 4/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=2, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=2;, score=nan total time=   0.0s
[CV 5/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=2, TREE__min_samples_leaf=1, T

In [None]:
print(stack.get_params())