In [1]:
#Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.model_selection import train_test_split, cross_val_score


from sklearn.metrics import precision_score, recall_score,f1_score,accuracy_score
from sklearn.metrics import plot_roc_curve,confusion_matrix

In [2]:
#Reading data
df=pd.read_csv("heart.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'heart.csv'

In [None]:
#Creating Dummy variables
a = pd.get_dummies(df['cp'], prefix = "cp")
b = pd.get_dummies(df['thal'], prefix = "thal")
c = pd.get_dummies(df['slope'], prefix = "slope")

In [None]:
#Combining Dummy variables with dataframe
frames = [df, a, b, c]
df = pd.concat(frames, axis = 1)
df.head()

In [None]:
#Dropping original fields whose dummy variables were created
df = df.drop(columns = ['cp', 'thal', 'slope'])
df.head()

# Model Training

In [None]:
# Splitting data into target feature and other features
x = df.drop("target", axis=1)
y = df["target"]

In [None]:
# Splitting data into training and testing sets
np.random.seed(42)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [None]:
#Creating dictionaries to store accuracy and recall values
accuracy_values={}
recall_values={}
f1_values={}

## Logistic Regression

In [None]:
#Creating the logistic regression model
logistic_model=LogisticRegression()
logistic_model.fit(x_train,y_train)

In [None]:
#Predicting/testing the Logistic Regression model
y_pred_logistic=logistic_model.predict(x_test)

In [None]:
#Accuracy and recall values
print("Accuracy score : %0.3f"%accuracy_score(y_test,y_pred_logistic))
print("Recall score   : %0.3f"%recall_score(y_test,y_pred_logistic))
print("F1 score       : %0.3f"%f1_score(y_test,y_pred_logistic))

In [None]:
#Adding the accuracy scores to the dictionaries
accuracy_values["Logistic_Regression"]=accuracy_score(y_test,y_pred_logistic)*100
recall_values["Logistic_Regression"]=recall_score(y_test,y_pred_logistic)*100
f1_values["Logistic_Regression"]=f1_score(y_test,y_pred_logistic)*100

In [None]:
#Confusion matrix heatmap for Logistic regression
plt.title("Logistic Regression")
sns.heatmap(confusion_matrix(y_test,y_pred_logistic),annot=True)
plt.show()

In [None]:
# Logistic Regression
# True Positives: 27 
# True Negatives: 27
# Type 1 Error=> False Positive: 5 
# Type 2 Error=> False Negative: 2

## Random Forest Classifier

In [None]:
#Creating the Random Forest Classifier model
np.random.seed(42)
random_forest_model=RandomForestClassifier()
random_forest_model.fit(x_train,y_train)

In [None]:
#Predicting/testing the Random Forest Classifier model
y_pred_rfc=random_forest_model.predict(x_test)

In [None]:
#Accuracy and recall values
print("Accuracy score : %0.3f"%accuracy_score(y_test,y_pred_rfc))
print("Recall score   : %0.3f"%recall_score(y_test,y_pred_rfc))
print("F1 score       : %0.3f"%f1_score(y_test,y_pred_rfc))

In [None]:
#Adding the accuracy scores to the dictionaries
accuracy_values["Random Forest"]=accuracy_score(y_test,y_pred_rfc)*100
recall_values["Random Forest"]=recall_score(y_test,y_pred_rfc)*100
f1_values["Random Forest"]=f1_score(y_test,y_pred_rfc)*100

In [None]:
#Confusion matrix heatmap for Random Forest Classifier
plt.title("Random Forest Classifier")
sns.heatmap(confusion_matrix(y_test,y_pred_rfc),annot=True)
plt.show()

In [None]:
# Random Forest Classifier
# True Positives: 25 
# True Negatives: 28
# Type 1 Error=> False Positive: 4 
# Type 2 Error=> False Negative: 4

## KNN Classifier

In [None]:
#Creating the KNN Classifier model
np.random.seed(42)
knn_model=KNeighborsClassifier()
knn_model.fit(x_train,y_train)

In [None]:
#Predicting/testing the KNN Classifier model
y_pred_knn=knn_model.predict(x_test)

In [None]:
#Accuracy and recall values
print("Accuracy score : %0.3f"%accuracy_score(y_test,y_pred_knn))
print("Recall score   : %0.3f"%recall_score(y_test,y_pred_knn))
print("F1 score       : %0.3f"%f1_score(y_test,y_pred_knn))

In [None]:
#Adding the accuracy scores to the dictionaries
accuracy_values["KNN"]=accuracy_score(y_test,y_pred_knn)*100
recall_values["KNN"]=recall_score(y_test,y_pred_knn)*100
f1_values["KNN"]=f1_score(y_test,y_pred_knn)*100

In [None]:
#Confusion matrix heatmap for K Neighbours Classifier
plt.title("K Neighbour Classifier")
sns.heatmap(confusion_matrix(y_test,y_pred_knn),annot=True)
plt.show()

In [None]:
# K Neighbours Classifier
# True Positives: 18
# True Negatives: 24
# Type 1 Error=> False Positive: 8 
# Type 2 Error=> False Negative: 11

## Support Vector Machine Classifier (SVM)

In [None]:
#Creating the SVM model
np.random.seed(42)
svm_model=svm.SVC(kernel="linear")
svm_model.fit(x_train,y_train)

In [None]:
#Predicting/testing the SVM Classifier model
y_pred_svm=svm_model.predict(x_test)

In [None]:
#Accuracy and recall values
print("Accuracy score : %0.3f"%accuracy_score(y_test,y_pred_svm))
print("Recall score   : %0.3f"%recall_score(y_test,y_pred_svm))
print("F1 score       : %0.3f"%f1_score(y_test,y_pred_svm))

In [None]:
#Adding the accuracy scores to the dictionaries
accuracy_values["SVM"]=accuracy_score(y_test,y_pred_svm)*100
recall_values["SVM"]=recall_score(y_test,y_pred_svm)*100
f1_values["SVM"]=f1_score(y_test,y_pred_svm)*100

In [None]:
#Confusion matrix heatmap for SVM Classifier
plt.title("Support Vector Machine Classifier")
sns.heatmap(confusion_matrix(y_test,y_pred_svm),annot=True)
plt.show()

In [None]:
# SVM Classifier
# True Positives: 26
# True Negatives: 26
# Type 1 Error=> False Positive: 6
# Type 2 Error=> False Negative: 3

## SVM with PCA

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

pca = PCA(n_components=3)
scaler = StandardScaler()

x_train = pca.fit_transform(x_train)
x_test = pca.transform(x_test)

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(x_train[:,0],x_train[:,1],c=y_train,cmap='plasma')
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')

In [None]:
#Creating the SVM model after PCA
np.random.seed(42)
svmpca_model=svm.SVC()
svmpca_model.fit(x_train,y_train)

In [None]:
#Predicting/testing the SVM Classifier model after PCA
y_pred_svmpca=svmpca_model.predict(x_test)

In [None]:
#Accuracy and recall values
print("Accuracy score : %0.3f"%accuracy_score(y_test,y_pred_svmpca))
print("Recall score   : %0.3f"%recall_score(y_test,y_pred_svmpca))
print("F1 score       : %0.3f"%f1_score(y_test,y_pred_svmpca))

In [None]:
#Adding the accuracy scores to the dictionaries
accuracy_values["SVM with PCA"]=accuracy_score(y_test,y_pred_svmpca)*100
recall_values["SVM with PCA"]=recall_score(y_test,y_pred_svmpca)*100
f1_values["SVM with PCA"]=f1_score(y_test,y_pred_svmpca)*100

In [None]:
#Confusion matrix heatmap for SVM Classifier after PCA
plt.title("SVM with PCA")
sns.heatmap(confusion_matrix(y_test,y_pred_svmpca),annot=True)
plt.show()

## KNN with PCA

In [None]:
#Creating the KNN CLassifier model after PCA
np.random.seed(42)
knnpca_model=KNeighborsClassifier()
knnpca_model.fit(x_train,y_train)

In [None]:
#Predicting/testing the KNN Classifier model after PCA
y_pred_knnpca=knnpca_model.predict(x_test)

In [None]:
#Accuracy and recall values
print("Accuracy score : %0.3f"%accuracy_score(y_test,y_pred_knnpca))
print("Recall score   : %0.3f"%recall_score(y_test,y_pred_knnpca))
print("F1 score       : %0.3f"%f1_score(y_test,y_pred_knnpca))

In [None]:
#Adding the accuracy scores to the dictionaries
accuracy_values["KNN with PCA"]=accuracy_score(y_test,y_pred_knnpca)*100
recall_values["KNN with PCA"]=recall_score(y_test,y_pred_knnpca)*100
f1_values["KNN with PCA"]=f1_score(y_test,y_pred_knnpca)*100

In [None]:
#Confusion matrix heatmap for K Neighbours Classifier after PCA
plt.title("K Neighbour Classifier with PCA")
sns.heatmap(confusion_matrix(y_test,y_pred_knnpca),annot=True)
plt.show()



## Comparing models

In [None]:
#Plotting Bar graph for Accuracy
fig, ax = plt.subplots(figsize =(10, 4))
plt.title("Accuracy Scores")
# Horizontal Bar Plot
ax.barh(list(accuracy_values.keys()),list(accuracy_values.values()),color=["#0088AA","#007766","#EE5500","#EE7700","#EEEE00","#333"])
for s in ['top', 'bottom', 'left', 'right']:
    ax.spines[s].set_visible(False)
    
# Add annotation to bars
for i in ax.patches:
    plt.text(i.get_width()+0.1, i.get_y()+0.3,
             str(round((i.get_width()), 2)),
             fontsize = 12,
             color ='#0f0f0f')

In [None]:
fig, ax = plt.subplots(figsize =(10, 4))
plt.title("Recall Scores")
# Horizontal Bar Plot
ax.barh(list(recall_values.keys()),list(recall_values.values()),color=["#0088AA","#007766","#EE5500","#EE7700","#EEEE00","#333"])
for s in ['top', 'bottom', 'left', 'right']:
    ax.spines[s].set_visible(False)
    
# Add annotation to bars
for i in ax.patches:
    plt.text(i.get_width()+0.1, i.get_y()+0.3,
             str(round((i.get_width()), 2)),
             fontsize = 12,
             color ='#0f0f0f')
 

In [None]:
fig, ax = plt.subplots(figsize =(10, 4))
plt.title("F1 Scores")
# Horizontal Bar Plot
ax.barh(list(f1_values.keys()),list(f1_values.values()),color=["#0088AA","#007766","#EE5500","#EE7700","#EEEE00","#333"])
for s in ['top', 'bottom', 'left', 'right']:
    ax.spines[s].set_visible(False)
    
# Add annotation to bars
for i in ax.patches:
    plt.text(i.get_width()+0.1, i.get_y()+0.3,
             str(round((i.get_width()), 2)),
             fontsize = 12,
             color ='#0f0f0f')