In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

In [3]:
df=pd.read_csv("heart1.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,77,1,0,125,304,0,0,162,1,0.0,2,3,2,0
1,76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
2,76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
3,76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
4,74,0,1,120,269,0,0,121,1,0.2,2,1,2,1


In [4]:
cp=pd.get_dummies(df['cp'],prefix='cp',drop_first=True)
thal=pd.get_dummies(df['ca'],prefix='ca',drop_first=True)
slope=pd.get_dummies(df['slope'],prefix='slope',drop_first=True)

In [5]:
new_df=pd.concat([df,cp,thal,slope],axis=1)
new_df=new_df.drop(['cp','ca','slope'],axis=1)
new_df.head()

Unnamed: 0,age,sex,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,thal,target,cp_1,cp_2,cp_3,ca_1,ca_2,ca_3,ca_4,slope_1,slope_2
0,77,1,125,304,0,0,162,1,0.0,2,0,0,0,0,0,0,1,0,0,1
1,76,0,140,197,0,2,116,0,1.1,2,1,0,1,0,0,0,0,0,1,0
2,76,0,140,197,0,2,116,0,1.1,2,1,0,1,0,0,0,0,0,1,0
3,76,0,140,197,0,2,116,0,1.1,2,1,0,1,0,0,0,0,0,1,0
4,74,0,120,269,0,0,121,1,0.2,2,1,1,0,0,1,0,0,0,0,1


In [6]:
X = new_df.drop(["target","fbs","restecg","thal"],axis=1)
y = new_df['target']

In [7]:
#Split data into training and test data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=2)

# Stacking CV Classifier

In [8]:
from mlxtend.classifier import StackingCVClassifier

In [9]:
clf1=DecisionTreeClassifier()
clf2=AdaBoostClassifier(learning_rate=1)
clf3=RandomForestClassifier(criterion='gini',bootstrap=False, max_depth=50, max_features=5,
                       n_estimators=10)
log=LogisticRegression(C=2)

In [30]:
sclf=StackingCVClassifier(classifiers=[clf1,clf2,clf3], meta_classifier=log,cv=10)

In [31]:
sclf.fit(X_train,y_train)

StackingCVClassifier(classifiers=[DecisionTreeClassifier(),
                                  AdaBoostClassifier(learning_rate=1),
                                  RandomForestClassifier(bootstrap=False,
                                                         max_depth=50,
                                                         max_features=5,
                                                         n_estimators=10)],
                     cv=10, meta_classifier=LogisticRegression(C=2))

In [32]:
predict=sclf.predict(X_test)

In [33]:
sclf.score(X_test,y_test)

0.9615384615384616

# Performance measure

In [34]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [35]:
final_accuracy=accuracy_score(predict,y_test)

In [36]:
final_confusion_matrix=confusion_matrix(predict,y_test)

In [37]:
print("\nStacked Generalization Technique")
print("Confusion Matrix")
print(final_confusion_matrix)
print("Accuracy: "+"{:.2%}".format(final_accuracy))


Stacked Generalization Technique
Confusion Matrix
[[ 33   2]
 [  4 117]]
Accuracy: 96.15%
