In [11]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split,StratifiedKFold,RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier,StackingClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,log_loss

In [12]:
bcancer=pd.read_csv('BreastCancer.csv',index_col=0)
lbl=LabelEncoder()
bcancer['Class']=lbl.fit_transform(bcancer['Class'])
X=bcancer.drop('Class',axis=1)
y=bcancer['Class']

In [13]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=23,test_size=0.3)

In [14]:
lr=LogisticRegression()
svm=SVC(kernel='linear',probability=True,random_state=23)
dtc=DecisionTreeClassifier(random_state=23)
rf=RandomForestClassifier(random_state=23)

In [15]:
#Without PassThrough
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf)
stack.fit(X_train,y_train) #Excludes all ops on train set
y_pred= stack.predict(X_test)   #excludes all operation on test set
print(accuracy_score(y_test,y_pred))

0.9333333333333333


In [16]:
#With PassThrough
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf,passthrough=True)
stack.fit(X_train,y_train) #Excludes all ops on train set
y_pred= stack.predict(X_test)   #excludes all operation on test set
print(accuracy_score(y_test,y_pred))

0.9571428571428572


In [17]:
################################### Other Models ###################################

In [18]:
gbm=GradientBoostingClassifier(random_state=23)
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=gbm,passthrough=True)
stack.fit(X_train,y_train) #Excludes all ops on train set
y_pred= stack.predict(X_test)   #excludes all operation on test set
print(accuracy_score(y_test,y_pred))

0.9523809523809523


In [19]:
#Grid search CV
gbm=GradientBoostingClassifier(random_state=23)
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=gbm,passthrough=True)
params={'TREE__max_depth':[2,3,4,5,6,None],
        'TREE__min_samples_split':[2,5,10],
        'TREE__min_samples_leaf':[1,3,5,7,10,15],
       'LR__penalty':['l1','l2',None],
       'SVM__C':np.linspace(0.001,3,5),}
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
gcv_stack=RandomizedSearchCV(stack,param_distributions=params,cv=kfold,scoring='neg_log_loss',verbose=3,n_iter=50)
gcv_stack.fit(X,y)
print(gcv_stack.best_params_)
print(gcv_stack.best_score_)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV 1/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=10;, score=-0.184 total time=   1.1s
[CV 2/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=10;, score=-0.114 total time=   1.0s
[CV 3/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=10;, score=-0.050 total time=   1.0s
[CV 4/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=10;, score=-0.185 total time=   1.0s
[CV 5/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=10;, score=-0.260 total time=   0.7s
[CV 1/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=None, TREE__min_samples_leaf=1, TREE__min_samples_split=5;, score=-0.176 total time=   0.8s
[CV 2/5] END LR__penalty=None,

70 fits failed out of a total of 250.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/ensemble/_stacking.py", line 658, in fit
    return super().fit(X, y_encoded, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  

{'TREE__min_samples_split': 5, 'TREE__min_samples_leaf': 15, 'TREE__max_depth': 4, 'SVM__C': 1.5005, 'LR__penalty': None}
-0.13839263775451607


In [None]:
print(stack.get_params())