# IMPORTS

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


: 

# LOADING THE DATA

In [None]:
df= pd.read_csv("heart1.csv")
df= df.drop(['oldpeak','slp','thall'],axis=1)
df.head()

: 

# STANDARDIZING THE DATA


In [None]:
scale=StandardScaler()
scale.fit(df)
df= scale.transform(df)
df=pd.DataFrame(df,columns=['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'caa', 'output'])

# SPLITTING DATA INTO TRAIN DATA AND TEST DATA

In [None]:
x= df.iloc[:,:-1]
print(x)
y= df.iloc[:,-1:]
print(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=101)
lbl= LabelEncoder()
encoded_y= lbl.fit_transform(y_train)

# LOGISTIC REGRESSION

In [None]:
logreg= LogisticRegression()
logreg = LogisticRegression()
logreg.fit(x_train, encoded_y)
from sklearn.metrics import confusion_matrix
encoded_ytest= lbl.fit_transform(y_test)
Y_pred1 = logreg.predict(x_test)
lr_conf_matrix = confusion_matrix(encoded_ytest,Y_pred1 )
lr_acc_score = accuracy_score(encoded_ytest, Y_pred1)
print("Logistic regression accuracy =",lr_acc_score*100,"%") # Printing the accuracy
print(lr_conf_matrix) # Printing the confusion matrix


# DECISION TREE CLASSIFICATION ALGO

In [None]:
tree= DecisionTreeClassifier()
tree.fit(x_train,encoded_y)
ypred2=tree.predict(x_test)
encoded_ytest= lbl.fit_transform(y_test)
tree_conf_matrix = confusion_matrix(encoded_ytest,ypred2 )
tree_acc_score = accuracy_score(encoded_ytest, ypred2)
print("Decision tree accuracy =",tree_acc_score*100,"%") # Printing the accuracy
print(tree_conf_matrix) # Printing the confusion matrix

# RANDOM FOREST ALGO

In [None]:
rf= RandomForestClassifier()
rf.fit(x_train,encoded_y)
ypred3 = rf.predict(x_test)
rf_conf_matrix = confusion_matrix(encoded_ytest,ypred3 )
rf_acc_score = accuracy_score(encoded_ytest, ypred3)
rf_conf_matrix = confusion_matrix(encoded_ytest,ypred3 )
rf_acc_score = accuracy_score(encoded_ytest, ypred3)
print("Random forest accuracy =",rf_acc_score*100,"%") # Printing the accuracy
print(rf_conf_matrix) # Printing the confusion matrix

# K-NEAREST NEIGHBOUR ALGO

In [None]:
error_rate= []
for i in range(1,40):
    knn= KNeighborsClassifier(n_neighbors=i)
    knn.fit(x_train,encoded_y)
    pred= knn.predict(x_test)
    error_rate.append(np.mean(pred != encoded_ytest))
knn= KNeighborsClassifier(n_neighbors=12)
knn.fit(x_train,encoded_y)
ypred4= knn.predict(x_test)
knn_conf_matrix = confusion_matrix(encoded_ytest,ypred4 )
knn_acc_score = accuracy_score(encoded_ytest, ypred4) 
print("K-nearest neighbour accuracy =",knn_acc_score*100,"%") # Printing the accuracy
print(knn_conf_matrix) # Printing the confusion matrix

# SUPPORT VECTOR MACHINE ALGO

In [None]:
svm= svm.SVC()
svm.fit(x_train,encoded_y)
ypred5= svm.predict(x_test)
svm_conf_matrix = confusion_matrix(encoded_ytest,ypred5)
svm_acc_score = accuracy_score(encoded_ytest, ypred5)
print("SVM accuracy =",svm_acc_score*100,"%") # Printing the accuracy
print(svm_conf_matrix) # Printing the confusion matrix

# Accuracy in Table form 

In [None]:
model_acc= pd.DataFrame({'Model' : ['Logistic Regression','Decision Tree','Random Forest','K Nearest Neighbor','SVM'],'Accuracy' : [lr_acc_score*100,tree_acc_score*100,rf_acc_score*100,knn_acc_score*100,svm_acc_score*100]})
model_acc = model_acc.sort_values(by=['Accuracy'],ascending=False)
print(model_acc)
print("Most accurate model is Logistic Regression")