In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [2]:
df1 = df[df.RestingBP<=(df.RestingBP.mean()+3*df.RestingBP.std())]
df1.shape

(911, 12)

In [3]:
df2 = df1[df1.Cholesterol<=(df1.Cholesterol.mean()+3*df1.Cholesterol.std())]
df2.shape

(908, 12)

In [4]:
df3 = df2[df2.Oldpeak<=(df2.Oldpeak.mean()+3*df2.Oldpeak.std())]
df3.shape

(902, 12)

In [5]:
df4 = df3.copy()

df4.Sex.replace({
    'M':1,
    'F':0
},inplace=True)

df4.RestingECG.replace({
    'Normal':1,
    'ST':2,
    'LVH':3
},inplace=True)

df4.ExerciseAngina.replace({
    'N':0,
    'Y':1
},inplace=True)

df4.ST_Slope.replace({
    'Up':3,
    'Flat':2,
    'Down':1
},inplace=True)

df4.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,1,ATA,140,289,0,1,172,0,0.0,3,0
1,49,0,NAP,160,180,0,1,156,0,1.0,2,1
2,37,1,ATA,130,283,0,2,98,0,0.0,3,0
3,48,0,ASY,138,214,0,1,108,1,1.5,2,1
4,54,1,NAP,150,195,0,1,122,0,0.0,3,0


In [6]:
df5 = pd.get_dummies(df4,drop_first=True)
df5.head()

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA
0,40,1,140,289,0,1,172,0,0.0,3,0,1,0,0
1,49,0,160,180,0,1,156,0,1.0,2,1,0,1,0
2,37,1,130,283,0,2,98,0,0.0,3,0,1,0,0
3,48,0,138,214,0,1,108,1,1.5,2,1,0,0,0
4,54,1,150,195,0,1,122,0,0.0,3,0,0,1,0


In [7]:
x = df5.drop('HeartDisease',axis='columns')
y= df5.HeartDisease

In [8]:
from sklearn.decomposition import PCA

pca = PCA(0.95)
x_pca = pca.fit_transform(x)
x_pca.shape

(902, 2)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

In [10]:
model_params = {
    
    'LogisticRegression' : {
        'model' : LogisticRegression(solver='liblinear',multi_class='auto'),
        'params' : {
            'C':[1,5,10]
        }
    },
    
    'Tree' : {
        'model' : DecisionTreeClassifier(),
        'params' : {
            'criterion':['gini','entropy']
        }
    },
    
    'SVM' : {
        'model' : SVC(gamma='auto'),
        'params' : {
            'C':[1,10,20],
            'kernel':['rbf','linear']
        }
    },
    
    'Random_Forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10,100]
        }
    },
    
    'KNN':{
        'model' : KNeighborsClassifier(),
        'params' : {
            'n_neighbors' : [1,2,3,4,5,6,7,8,9,10]
        }
    }
    
}

In [11]:
from sklearn.model_selection import GridSearchCV
scores=[]

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    clf.fit(x_pca,y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
score_df = pd.DataFrame(scores)
score_df

Unnamed: 0,model,best_score,best_params
0,LogisticRegression,0.651952,{'C': 5}
1,Tree,0.60868,{'criterion': 'gini'}
2,SVM,0.661915,"{'C': 10, 'kernel': 'linear'}"
3,Random_Forest,0.649705,{'n_estimators': 100}
4,KNN,0.66849,{'n_neighbors': 9}
