In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import chi2, SelectFdr, SelectKBest, SelectPercentile
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression

In [2]:
#train_test split
def train_test(Indep, dep):
    x_train,x_test,y_train,y_test= train_test_split(Indep, dep, test_size=0.30, random_state=0)
    sc = StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train,x_test, y_train,y_test 

In [3]:
#cm score
def CM(classifier,x_test,y_test):
    y_pred=classifier.predict(x_test)
    cm=confusion_matrix(y_test,y_pred)
    cr=classification_report(y_test,y_pred)
    Accuracy=accuracy_score(y_test, y_pred )
    return cm, cr, Accuracy, x_test, y_test

In [4]:
#feature Selection
def selectkbest(Indep,dep,n):
    best = SelectKBest(score_func=chi2, k=n)
    fit1=best.fit(Indep,dep)
    feature=fit1.transform(Indep)
    return feature
    
def fdr(Indep,dep):
    model =SelectFdr(score_func=chi2)
    feature1=model.fit_transform(Indep,dep)
    return feature1
    
def percentile(Indep,dep):
    model =SelectPercentile(score_func=chi2)
    feature2=model.fit_transform(Indep,dep)
    return feature2

In [5]:
#LogisticRegression Model
def logistic(x_train,y_train,x_test,y_test):       
    classifier = LogisticRegression(solver='lbfgs', max_iter=200)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test  

In [6]:
#RandomForestClassifier Model
def rf(x_train,y_train,x_test,y_test):
    classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [7]:
#GaussianNB Model
def gaus(x_train,y_train,x_test,y_test):
    classifier=GaussianNB()
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [8]:
#KNeighborsClassifier Model
def KN(x_train,y_train,x_test,y_test):
    classifier=KNeighborsClassifier()
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [9]:
#DecisionTreeClassifier Model
def DT(x_train,y_train,x_test,y_test):
    classifier=DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [10]:
#SVC Model
def SVCM(x_train,y_train,x_test,y_test):
    classifier=SVC(kernel = 'linear', random_state = 0)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [11]:
dataset= pd.read_csv("Heart_Disease_Prediction.csv")
ds=dataset

In [12]:
ds= pd.get_dummies(ds, drop_first=True)

In [13]:
Indep=ds.drop('Heart Disease_Presence', axis=1)
dep=ds['Heart Disease_Presence']


In [14]:
log_l=[]
rfm_l=[]
guasm_l=[]
kn_l=[]
dtm_l=[]
svcm_l=[]

In [15]:
#Table Creation
def FDR_classifier(log_l,rfm_l, guasm_l, kn_l, dtm_l, svcm_l):
    dataframe = pd.DataFrame(index=['FDR'],columns=['Logistic','RandomForest', 'Guassian','KNeighbors','DecisionTree', 'SVC'])
    for num, idex in enumerate(dataframe.index):
        dataframe.loc[idex, 'Logistic']=log_l[num]
        dataframe.loc[idex, 'RandomForest']=rfm_l[num]
        dataframe.loc[idex, 'Guassian']=guasm_l[num]
        dataframe.loc[idex,'KNeighbors']=kn_l[num]
        dataframe.loc[idex,'DecisionTree']=dtm_l[num]
        dataframe.loc[idex,'SVC']=svcm_l[num]
    return dataframe

In [16]:
feature1=fdr(Indep,dep)

x_train,x_test, y_train,y_test= train_test(feature1, dep)

cm, cr, Accuracy, x_test, y_test=logistic(x_train,y_train,x_test,y_test)
log_l.append(Accuracy)
        
cm, cr, Accuracy, x_test, y_test= rf(x_train,y_train,x_test,y_test)
rfm_l.append(Accuracy)
        
cm, cr, Accuracy, x_test, y_test= gaus(x_train,y_train,x_test,y_test)
guasm_l.append(Accuracy)
        
cm, cr, Accuracy, x_test, y_test= KN(x_train,y_train,x_test,y_test)
kn_l.append(Accuracy)
        
cm, cr, Accuracy, x_test, y_test= DT(x_train,y_train,x_test,y_test)
dtm_l.append(Accuracy)
        
cm, cr, Accuracy, x_test, y_test= SVCM(x_train,y_train,x_test,y_test)
svcm_l.append(Accuracy)
    
result1= FDR_classifier(log_l,rfm_l, guasm_l, kn_l, dtm_l, svcm_l)

In [17]:
log_p=[]
rfm_p=[]
guasm_p=[]
kn_p=[]
dtm_p=[]
svcm_p=[]

In [18]:
#Table Creation
def Percentile_classifier(log_p,rfm_p, guasm_p, kn_p, dtm_p, svcm_p):
    dataframe = pd.DataFrame(index=['Percentile'],columns=['Logistic','RandomForest', 'Guassian','KNeighbors','DecisionTree', 'SVC'])
    for num, idex in enumerate(dataframe.index):
        dataframe.loc[idex, 'Logistic']=log_p[num]
        dataframe.loc[idex, 'RandomForest']=rfm_p[num]
        dataframe.loc[idex, 'Guassian']=guasm_p[num]
        dataframe.loc[idex,'KNeighbors']=kn_p[num]
        dataframe.loc[idex,'DecisionTree']=dtm_p[num]
        dataframe.loc[idex,'SVC']=svcm_p[num]
    return dataframe

In [19]:
feature2=percentile(Indep,dep)
x_train,x_test, y_train,y_test= train_test(feature2, dep)

cm, cr, Accuracy, x_test, y_test= logistic(x_train,y_train,x_test,y_test)
log_p.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= rf(x_train,y_train,x_test,y_test)
rfm_p.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= gaus(x_train,y_train,x_test,y_test)
guasm_p.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= KN(x_train,y_train,x_test,y_test)
kn_p.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= DT(x_train,y_train,x_test,y_test)
dtm_p.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= SVCM(x_train,y_train,x_test,y_test)
svcm_p.append(Accuracy)

result2= Percentile_classifier(log_p,rfm_p, guasm_p, kn_p, dtm_p, svcm_p)

In [20]:
print("SelectFDR Feature Selection:")
print(result1)
print("---------------------------------")
print("SelectPercentile Feature Selection:")
print(result2)


SelectFDR Feature Selection:
    Logistic RandomForest  Guassian KNeighbors DecisionTree       SVC
FDR  0.82716     0.765432  0.790123   0.765432     0.641975  0.839506
---------------------------------
SelectPercentile Feature Selection:
            Logistic RandomForest  Guassian KNeighbors DecisionTree       SVC
Percentile  0.716049     0.691358  0.728395   0.641975     0.666667  0.740741


In [39]:
log=[]
rfm=[]
guasm=[]
kn=[]
dtm=[]
svcm=[]

In [40]:
#Table Creation
def selectk_classifier(log,rfm, guasm, kn, dtm, svcm):
    dataframe = pd.DataFrame(index=['kbest'],columns=['Logistic','RandomForest', 'Guassian','KNeighbors','DecisionTree', 'SVC'])
    for num, idex in enumerate(dataframe.index):
        dataframe.loc[idex,'Logistic']=log[num]
        dataframe.loc[idex,'RandomForest']=rfm[num]
        dataframe.loc[idex,'Guassian']=guasm[num]
        dataframe.loc[idex,'KNeighbors']=kn[num]
        dataframe.loc[idex,'DecisionTree']=dtm[num]
        dataframe.loc[idex,'SVC']=svcm[num]
    return dataframe

In [41]:
kbest=selectkbest(Indep,dep,8)

In [42]:
x_train,x_test, y_train,y_test= train_test(kbest, dep)

cm, cr, Accuracy, x_test, y_test= logistic(x_train,y_train,x_test,y_test)
log.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= rf(x_train,y_train,x_test,y_test)
rfm.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= gaus(x_train,y_train,x_test,y_test)
guasm.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= KN(x_train,y_train,x_test,y_test)
kn.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= DT(x_train,y_train,x_test,y_test)
dtm.append(Accuracy)

cm, cr, Accuracy, x_test, y_test= SVCM(x_train,y_train,x_test,y_test)
svcm.append(Accuracy)

result = selectk_classifier(log,rfm, guasm, kn, dtm, svcm)

In [25]:
#4
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
kbest,0.802469,0.753086,0.777778,0.802469,0.740741,0.802469


In [33]:
#5
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
kbest,0.802469,0.790123,0.777778,0.82716,0.716049,0.802469


In [38]:
#6
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
kbest,0.814815,0.740741,0.765432,0.814815,0.679012,0.814815


In [43]:
#8
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
kbest,0.802469,0.740741,0.765432,0.814815,0.728395,0.790123
