In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import chi2, RFE, SelectKBest
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA, KernelPCA
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression

In [2]:
#train_test split
def train_test(Indep, dep):
    x_train,x_test,y_train,y_test= train_test_split(Indep, dep, test_size=0.30, random_state=0)
    sc = StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train,x_test, y_train,y_test 

In [3]:
#cm score
def CM(classifier,x_test,y_test):
    y_pred=classifier.predict(x_test)
    cm=confusion_matrix(y_test,y_pred)
    cr=classification_report(y_test,y_pred)
    Accuracy=accuracy_score(y_test, y_pred )
    return cm, cr, Accuracy, x_test, y_test

In [4]:
#feature Selection
def rfe_f(Indep,dep,n):
    feature1 = []
    log=LogisticRegression(solver='lbfgs', max_iter=200)
    rf=RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    dt=DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
    sv=SVC(kernel = 'linear', random_state = 0)

    rfemodel=[log,rf,dt,sv]
    
    for i in rfemodel:
        print (i)
        best = RFE(estimator=i, n_features_to_select=n)
        fit1=best.fit(Indep,dep)
        feature=fit1.transform(Indep)
        feature1.append(feature)
    return feature1

In [5]:
#LogisticRegression Model
def logistic(x_train,y_train,x_test,y_test):       
    classifier = LogisticRegression(solver='lbfgs', max_iter=200)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test  

In [6]:
#RandomForestClassifier Model
def rf(x_train,y_train,x_test,y_test):
    classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [7]:
#GaussianNB Model
def gaus(x_train,y_train,x_test,y_test):
    classifier=GaussianNB()
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [8]:
#KNeighborsClassifier Model
def KN(x_train,y_train,x_test,y_test):
    classifier=KNeighborsClassifier()
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [9]:
#DecisionTreeClassifier Model
def DT(x_train,y_train,x_test,y_test):
    classifier=DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [10]:
#SVC Model
def SVCM(x_train,y_train,x_test,y_test):
    classifier=SVC(kernel = 'linear', random_state = 0)
    classifier.fit(x_train,y_train)
    cm, cr, Accuracy, x_test, y_test=CM(classifier,x_test,y_test)
    return cm, cr, Accuracy, x_test, y_test

In [11]:
dataset= pd.read_csv("Heart_Disease_Prediction.csv")
ds=dataset

In [12]:
ds= pd.get_dummies(ds, drop_first=True)

In [13]:
Indep=ds.drop('Heart Disease_Presence', axis=1)
dep=ds['Heart Disease_Presence']


In [36]:
log_l=[]
rfm_l=[] 
guasm_l=[] 
kn_l=[] 
dtm_l=[] 
svcm_l=[]

In [37]:
#Table Creation
def RFE_regression(log_l,rfm_l, guasm_l, kn_l, dtm_l, svcm_l):
    dataframe = pd.DataFrame(index=['Logistic','RandomForest','DecisionTree', 'SVC'],columns=['Logistic','RandomForest', 'Guassian','KNeighbors','DecisionTree', 'SVC'])
    for num, idex in enumerate(dataframe.index):
        dataframe.loc[idex, 'Logistic']=log_l[num]
        dataframe.loc[idex, 'RandomForest']=rfm_l[num]
        dataframe.loc[idex, 'Guassian']=guasm_l[num]
        dataframe.loc[idex,'KNeighbors']=kn_l[num]
        dataframe.loc[idex,'DecisionTree']=dtm_l[num]
        dataframe.loc[idex,'SVC']=svcm_l[num]
    return dataframe

In [38]:
feature1=rfe_f(Indep,dep,3)

for j in feature1:
    x_train,x_test, y_train,y_test= train_test(j, dep)

    cm, cr, Accuracy, x_test, y_test=logistic(x_train,y_train,x_test,y_test)
    log_l.append(Accuracy)
        
    cm, cr, Accuracy, x_test, y_test= rf(x_train,y_train,x_test,y_test)
    rfm_l.append(Accuracy)
        
    cm, cr, Accuracy, x_test, y_test= gaus(x_train,y_train,x_test,y_test)
    guasm_l.append(Accuracy)
        
    cm, cr, Accuracy, x_test, y_test= KN(x_train,y_train,x_test,y_test)
    kn_l.append(Accuracy)
        
    cm, cr, Accuracy, x_test, y_test= DT(x_train,y_train,x_test,y_test)
    dtm_l.append(Accuracy)
        
    cm, cr, Accuracy, x_test, y_test= SVCM(x_train,y_train,x_test,y_test)
    svcm_l.append(Accuracy)
    
result= RFE_regression(log_l,rfm_l, guasm_l, kn_l, dtm_l, svcm_l)

LogisticRegression(max_iter=200)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
DecisionTreeClassifier(max_features='sqrt', random_state=0)
SVC(kernel='linear', random_state=0)


In [27]:
#4
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
Logistic,0.728395,0.753086,0.753086,0.740741,0.728395,0.728395
RandomForest,0.777778,0.716049,0.777778,0.777778,0.716049,0.802469
DecisionTree,0.777778,0.716049,0.777778,0.777778,0.716049,0.802469
SVC,0.728395,0.753086,0.753086,0.740741,0.728395,0.728395


In [23]:
#5
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
Logistic,0.753086,0.740741,0.777778,0.777778,0.740741,0.777778
RandomForest,0.790123,0.716049,0.790123,0.740741,0.728395,0.777778
DecisionTree,0.802469,0.790123,0.777778,0.82716,0.716049,0.802469
SVC,0.753086,0.740741,0.777778,0.777778,0.740741,0.777778


In [31]:
#7
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
Logistic,0.790123,0.740741,0.740741,0.740741,0.691358,0.753086
RandomForest,0.82716,0.802469,0.814815,0.814815,0.753086,0.82716
DecisionTree,0.802469,0.716049,0.790123,0.740741,0.728395,0.802469
SVC,0.802469,0.802469,0.790123,0.790123,0.839506,0.814815


In [35]:
#8
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
Logistic,0.802469,0.82716,0.790123,0.765432,0.765432,0.839506
RandomForest,0.82716,0.802469,0.82716,0.777778,0.716049,0.82716
DecisionTree,0.802469,0.740741,0.765432,0.814815,0.728395,0.790123
SVC,0.802469,0.82716,0.790123,0.765432,0.765432,0.839506


In [39]:
#3
result

Unnamed: 0,Logistic,RandomForest,Guassian,KNeighbors,DecisionTree,SVC
Logistic,0.753086,0.703704,0.666667,0.753086,0.703704,0.753086
RandomForest,0.802469,0.765432,0.802469,0.802469,0.716049,0.802469
DecisionTree,0.777778,0.716049,0.790123,0.765432,0.728395,0.814815
SVC,0.765432,0.679012,0.740741,0.641975,0.679012,0.765432
