In [1]:
import pandas as pd 
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier


In [2]:
df=pd.read_csv("heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

X=SimpleImputer(missing_values=np.nan,strategy='mean').fit_transform(X)
X=StandardScaler().fit_transform(X)
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.25)
x_train.shape,y_train.shape,x_test.shape,y_test.shape

((227, 13), (227,), (76, 13), (76,))

In [4]:
LR=LogisticRegression(penalty='l2',C=1,solver='lbfgs')#{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}
LR.fit(x_train,y_train)
LR.score(x_train,y_train),LR.score(x_test,y_test)

(0.8678414096916299, 0.8421052631578947)

In [18]:
SGDLR=SGDClassifier(loss='log_loss',penalty='l1')#'l2', 'l1', 'elasticnet'|'hinge','log','modified_huber',squared_hinge,perceptron
SGDLR.fit(x_train,y_train)
SGDLR.score(x_train,y_train),SGDLR.score(x_test,y_test)

(0.8281938325991189, 0.8157894736842105)

In [20]:
SVM=SVC(C=1.0,kernel='linear',degree=3,gamma='scale',probability=True) #{'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}
SVM.fit(x_train,y_train)
SVM.score(x_train,y_train),SVM.score(x_test,y_test)

(0.8722466960352423, 0.8289473684210527)

In [7]:
MLPN=MLPClassifier(hidden_layer_sizes=100,activation='relu',solver='adam') 
MLPN.fit(x_train,y_train)
MLPN.score(x_train,y_train),MLPN.score(x_test,y_test)



(0.933920704845815, 0.8421052631578947)

In [8]:
DT=DecisionTreeClassifier(criterion='gini',splitter='best',max_depth=5)
DT.fit(x_train,y_train)
DT.score(x_train,y_train),DT.score(x_test,y_test)

(0.933920704845815, 0.7631578947368421)

In [9]:
RF=RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=10)
RF.fit(x_train,y_train)
RF.score(x_train,y_train),RF.score(x_test,y_test)

(1.0, 0.8157894736842105)

In [10]:
GB=GradientBoostingClassifier(loss='deviance',learning_rate=0.05,n_estimators=8)
GB.fit(x_train,y_train)
GB.score(x_train,y_train),GB.score(x_test,y_test)



(0.8854625550660793, 0.7894736842105263)

In [11]:
# the heightest accurace i get is 90 %

In [15]:
from sklearn.calibration import CalibratedClassifierCV

In [25]:
VotingClassifierModel=VotingClassifier(estimators=[('LogisticRegressionModel',LR),('SGDClassifierModel',SGDLR),
                                                  ('SVCModel',SVM),('MLPClassifierModel',MLPN),
                                                  ('DecisionTreeClassifierModel',DT),('RandomForestClassifierModel',RF),
                                                  ('GradientBoostingClassifierModel',GB)],voting="soft")

VotingClassifierModel.fit(x_train,y_train)
calibrator = CalibratedClassifierCV(VotingClassifierModel, cv='prefit')
model=calibrator.fit(x_train, y_train)

VotingClassifierModel.fit(x_train,y_train)
VotingClassifierModel.score(x_train,y_train),VotingClassifierModel.score(x_test,y_test)



(0.933920704845815, 0.8157894736842105)

In [26]:
y_predict=VotingClassifierModel.predict(x_test)
y_predict_probability=VotingClassifierModel.predict_proba(x_test)

print("actual value ",y_test[10:20].values,"\n") 
print("predicted value ",y_predict[10:20],"\n") 
print("probability of predicted value ",y_predict_probability[10:20],"\n") 

actual value  [1 0 0 1 1 1 1 1 1 0] 

predicted value  [1 0 0 1 1 1 1 1 1 1] 

probability of predicted value  [[0.14201151 0.85798849]
 [0.92133047 0.07866953]
 [0.75236237 0.24763763]
 [0.12944803 0.87055197]
 [0.10033    0.89967   ]
 [0.12529565 0.87470435]
 [0.15577966 0.84422034]
 [0.08173993 0.91826007]
 [0.18315096 0.81684904]
 [0.25699929 0.74300071]] 

