In [1]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
#importing dataset
dataset=pd.read_csv('D:/datasets/heart.csv')
X = dataset.iloc[:,0:13].values
y = dataset.iloc[:,-1].values

In [3]:
#Splitting dataset into training set and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [4]:
#using various classifiers
# Logistic Regression
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
cm1=confusion_matrix(y_test,y_pred)
lr_acc=accuracy_score(y_test,y_pred)
lr_prec=cm1[1][1]/(cm1[1][1]+cm1[0][1])
lr_recall=cm1[1][1]/(cm1[1][1]+cm1[1][0])
lr_f1=2*lr_prec*lr_recall/(lr_prec+lr_recall)



In [5]:
# using KNN classifier
from sklearn.neighbors import KNeighborsClassifier
# KNN Classifier using k=10
knn=KNeighborsClassifier(n_neighbors = 10, metric = 'minkowski', p=2)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
knn_acc=accuracy_score(y_test,y_pred)
cm2=confusion_matrix(y_test,y_pred)
knn_prec=cm2[1][1]/(cm2[1][1]+cm2[0][1])
knn_recall=cm2[1][1]/(cm2[1][1]+cm2[1][0])
knn_f1=2*knn_prec*knn_recall/(knn_prec+knn_recall)

In [6]:
#SVC Gaussian
from sklearn.svm import SVC
svc1=SVC(kernel='rbf')
svc1.fit(X_train,y_train)
y_pred=svc1.predict(X_test)
svc_acc=accuracy_score(y_test,y_pred)
cm3=confusion_matrix(y_test,y_pred)
svc_prec=cm3[1][1]/(cm3[1][1]+cm3[0][1])
svc_recall=cm3[1][1]/(cm3[1][1]+cm3[1][0])
svc_f1=2*svc_prec*svc_recall/(svc_prec+svc_recall)

In [7]:
#SVC Linear
from sklearn.svm import SVC
svc=SVC(kernel='linear')
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
svc_lin_acc=accuracy_score(y_test,y_pred)
cm5=confusion_matrix(y_test,y_pred)
svc_lin_prec=cm5[1][1]/(cm5[1][1]+cm5[0][1])
svc_lin_recall=cm5[1][1]/(cm5[1][1]+cm5[1][0])
svc_lin_f1=2*svc_lin_prec*svc_lin_recall/(svc_lin_prec+svc_lin_recall)

In [8]:
#Decision Tree
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier(criterion = 'entropy', max_depth=7, random_state=0)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
dec_acc=accuracy_score(y_test, y_pred)
cm6=confusion_matrix(y_test,y_pred)
dec_prec=cm6[1][1]/(cm6[1][1]+cm6[0][1])
dec_recall=cm6[1][1]/(cm6[1][1]+cm6[1][0])
dec_f1=2*dec_prec*dec_recall/(dec_prec+dec_recall)

In [9]:
#Random Decision trees
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier(n_estimators=100, criterion = 'entropy', random_state=0)
rfc.fit(X_train,y_train)
y_pred=rfc.predict(X_test)
rfc_acc=accuracy_score(y_test,y_pred)
cm7=confusion_matrix(y_test,y_pred)
rfc_prec=cm7[1][1]/(cm7[1][1]+cm7[0][1])
rfc_recall=cm7[1][1]/(cm7[1][1]+cm7[1][0])
rfc_f1=2*rfc_prec*rfc_recall/(rfc_prec+rfc_recall)

In [10]:
# Dimensionality Reduction
# Applying LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda=LDA(n_components = 2)
X_train = lda.fit_transform(X_train,y_train)
X_test = lda.fit_transform(X_test,y_test)



In [11]:
#Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
gnb=GaussianNB()
gnb.fit(X_train,y_train)
y_pred=gnb.predict(X_test)
gnb_acc=accuracy_score(y_test,y_pred)
cm4=confusion_matrix(y_test,y_pred)
gnb_prec=cm4[1][1]/(cm4[1][1]+cm4[0][1])
gnb_recall=cm4[1][1]/(cm4[1][1]+cm4[1][0])
gnb_f1=2*gnb_prec*gnb_recall/(gnb_prec+gnb_recall)

In [26]:
# creating the prec,recall,f1 table
models=pd.DataFrame({
    'Model' : ['KNN','Logistic Regression','LDA + Naive Bayes','Random Forest','Decision Tree','Linear SVC','RBF SVC'],
    'Accuracy Score' : [knn_acc,lr_acc,gnb_acc,rfc_acc,dec_acc,svc_lin_acc,svc_acc],
    'Precision' : [knn_prec,lr_prec,gnb_prec,rfc_prec,dec_prec,svc_lin_prec,svc_prec],
    'Recall' : [knn_recall,lr_recall,gnb_recall,rfc_recall,dec_recall,svc_lin_recall,svc_recall],
    'F1 Score' : [knn_f1,lr_f1,gnb_f1,rfc_f1,dec_f1,svc_lin_f1,svc_f1],
    'TP' : [cm2[0][0],cm1[0][0],cm4[0][0],cm7[0][0],cm6[0][0],cm5[0][0],cm3[0][0]],
    'TN' : [cm2[1][1],cm1[1][1],cm4[1][1],cm7[1][1],cm6[1][1],cm5[1][1],cm3[1][1]]
})
models.sort_values(by='Accuracy Score',ascending=False)

Unnamed: 0,Model,Accuracy Score,Precision,Recall,F1 Score,TP,TN
2,LDA + Naive Bayes,0.918033,0.939394,0.911765,0.925373,25,31
0,KNN,0.885246,0.909091,0.882353,0.895522,24,30
6,RBF SVC,0.868852,0.842105,0.941176,0.888889,21,32
1,Logistic Regression,0.836066,0.833333,0.882353,0.857143,21,30
3,Random Forest,0.819672,0.848485,0.823529,0.835821,22,28
5,Linear SVC,0.819672,0.810811,0.882353,0.84507,20,30
4,Decision Tree,0.786885,0.862069,0.735294,0.793651,23,25


In [28]:
#models.to_csv(r"C:\Users\Syed\Desktop\Project_files\dataset_folder\heart_data_out.csv",index=False,header=True)