In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler


import os
print(os.listdir())

import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset = pd.read_csv("heart.csv")
print(dataset)

In [None]:
type(dataset)

In [None]:
dataset.shape
dataset.head(5)


In [None]:
dataset.sample(5)

In [None]:
dataset.info()

In [None]:
dataset["target"].describe()
dataset["target"].unique()

In [None]:
print(dataset.corr()["target"].abs().sort_values(ascending=False))

In [None]:
#Exploratory Data Analysis (EDA)
#First, analysing the target variable
y = dataset["target"]

sns.countplot(y)


target_temp = dataset.target.value_counts()

print(target_temp)

In [None]:
print("Percentage of patience without heart problems: "+str(round(target_temp[0]*100/303,2)))
print("Percentage of patience with heart problems: "+str(round(target_temp[1]*100/303,2)))

In [None]:
#Analysing the 'Sex' feature
dataset["sex"].unique()
sns.barplot(dataset["sex"],y)

In [None]:
#Analysing the 'Chest Pain Type' feature
dataset["cp"].unique()
sns.barplot(dataset["cp"],y)

In [None]:
#Analysing the FBS feature
dataset["fbs"].describe()
dataset["fbs"].unique()

sns.barplot(dataset["fbs"],y)

In [None]:
#Analysing the restecg feature
dataset["restecg"].unique()
sns.barplot(dataset["restecg"],y)

In [None]:
#Analysing the 'exang' feature
dataset["exang"].unique()

sns.barplot(dataset["exang"],y)

In [None]:
#Analysing the Slope feature
dataset["slope"].unique()

sns.barplot(dataset["slope"],y)

In [None]:
#Analysing the 'ca' feature
#number of major vessels (0-3) colored by flourosopy
dataset["ca"].unique()

sns.countplot(dataset["ca"])

In [None]:
sns.barplot(dataset["ca"],y)

In [None]:
# Analysing the 'thal' feature
dataset["thal"].unique()

sns.barplot(dataset["thal"],y)


In [None]:
sns.distplot(dataset["thal"])

In [None]:
#Train Test split
from sklearn.model_selection import train_test_split

predictors = dataset.drop("target",axis=1)
target = dataset["target"]

X_train,X_test,Y_train,Y_test = train_test_split(predictors,target,test_size=0.20,random_state=0)


In [None]:
#Data Standardization
scaler=StandardScaler()         
scaler.fit(X_train)
x_train=scaler.transform(X_train)
x_test=scaler.transform(X_test)
print(X_train)

In [None]:
X_test.shape

In [None]:
Y_train.shape

In [None]:
Y_test.shape

In [None]:
#Model Fitting
from sklearn.metrics import accuracy_score

In [None]:
#Logistic Regression
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()

lr.fit(X_train,Y_train)

Y_pred_lr = lr.predict(X_test)
Y_pred_lr.shape

In [None]:
score_lr = round(accuracy_score(Y_pred_lr,Y_test)*100,2)

print("The accuracy score achieved using Logistic Regression is: "+str(score_lr)+" %")

In [None]:
#Naive Bayes
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()

nb.fit(X_train,Y_train)

Y_pred_nb = nb.predict(X_test)
Y_pred_nb.shape

In [None]:
score_nb = round(accuracy_score(Y_pred_nb,Y_test)*100,2)

print("The accuracy score achieved using Naive Bayes is: "+str(score_nb)+" %")

In [None]:
#SVM
from sklearn import svm

sv = svm.SVC(kernel='linear')

sv.fit(X_train, Y_train)

Y_pred_svm = sv.predict(X_test)
Y_pred_svm.shape

In [None]:
score_svm = round(accuracy_score(Y_pred_svm,Y_test)*100,2)

print("The accuracy score achieved using Linear SVM is: "+str(score_svm)+" %")

In [None]:
#K Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train,Y_train)
Y_pred_knn=knn.predict(X_test)
Y_pred_knn.shape

In [None]:
score_knn = round(accuracy_score(Y_pred_knn,Y_test)*100,2)

print("The accuracy score achieved using KNN is: "+str(score_knn)+" %")

In [None]:
#Decision Tree
from sklearn.tree import DecisionTreeClassifier

max_accuracy = 0


for x in range(200):
    dt = DecisionTreeClassifier(random_state=x)
    dt.fit(X_train,Y_train)
    Y_pred_dt = dt.predict(X_test)
    current_accuracy = round(accuracy_score(Y_pred_dt,Y_test)*100,2)
    if(current_accuracy>max_accuracy):
        max_accuracy = current_accuracy
        best_x = x
        
#print(max_accuracy)
#print(best_x)


dt = DecisionTreeClassifier(random_state=best_x)
dt.fit(X_train,Y_train)
Y_pred_dt = dt.predict(X_test)
print(Y_pred_dt.shape)

In [None]:
score_dt = round(accuracy_score(Y_pred_dt,Y_test)*100,2)

print("The accuracy score achieved using Decision Tree is: "+str(score_dt)+" %")

In [None]:
#Random Forest
from sklearn.ensemble import RandomForestClassifier

max_accuracy = 0


for x in range(2000):
    rf = RandomForestClassifier(random_state=x)
    rf.fit(X_train,Y_train)
    Y_pred_rf = rf.predict(X_test)
    current_accuracy = round(accuracy_score(Y_pred_rf,Y_test)*100,2)
    if(current_accuracy>max_accuracy):
        max_accuracy = current_accuracy
        best_x = x
        
#print(max_accuracy)
#print(best_x)

rf = RandomForestClassifier(random_state=best_x)
rf.fit(X_train,Y_train)
Y_pred_rf = rf.predict(X_test)
Y_pred_rf.shape

In [None]:
age=int(input("Enter the age of the patient:"))
sex=int(input("Enter the sex:Male-1,Female-0:"))
cp=int(input("Enter the value of cp:"))
trestbps=int(input("Enter the value of trestbps:"))
chol=int(input("Enter the value of chol:"))
fbs=int(input("Enter the value of fbs:"))
restecg=int(input("Enter the value of restecg:"))
thalach=int(input("Enter the value of thalach:"))
exang=int(input("Enter the value of exang:"))
oldpeak=float(input("Enter the value of oldpeak:"))
slope=int(input("Enter the value of slope:"))
ca=int(input("Enter the value of ca:"))
thal=int(input("Enter the value of thal:"))

y=rf.predict([[age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal]])
print(y)

In [None]:
score_rf = round(accuracy_score(Y_pred_rf,Y_test)*100,2)

print("The accuracy score achieved using Decision Tree is: "+str(score_rf)+" %")

In [None]:
#XGBoost
import xgboost as xgb

xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=42)
xgb_model.fit(X_train, Y_train)

Y_pred_xgb = xgb_model.predict(X_test)
Y_pred_xgb.shape

In [None]:
score_xgb = round(accuracy_score(Y_pred_xgb,Y_test)*100,2)

print("The accuracy score achieved using XGBoost is: "+str(score_xgb)+" %")

In [None]:
#Output final score
scores = [score_lr,score_nb,score_svm,score_knn,score_dt,score_rf,score_xgb]
algorithms = ["Logistic Regression","Naive Bayes","Support Vector Machine","K-Nearest Neighbors","Decision Tree","Random Forest","XGBoost"]    

for i in range(len(algorithms)):
    print("The accuracy score achieved using "+algorithms[i]+" is: "+str(scores[i])+" %")

In [None]:
sns.set(rc={'figure.figsize':(15,8)})
plt.xlabel("Algorithms")
plt.ylabel("Accuracy score")

sns.barplot(algorithms,scores)

In [None]:
'''#Support Vector Machine Model(SVM)
model=svm.SVC(kernel='linear')   

#Training the svm model with training data
model.fit(X_train,Y_train)

#Accuracy Score
#Accuracy Score of training data
X_train_rf=model.predict(X_train)
training_data_accuracy=accuracy_score(Y_train,X_train_rf)

#print("Accuracy score of training data: ",training_data_accuracy)
#Accuracy Score of testing data
X_test_prediction=model.predict(X_test)
test_data_accuracy=accuracy_score(Y_test,X_test_prediction)
#print("Accuracy score of test data: ",test_data_accuracy)

input_data=(57,1,0,140,192,0,1,148,0,0.4,1,0,1)
#changing input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

#reshape the numpy array
input_data_reshaped= input_data_as_numpy_array.reshape(1,-1)

#standarize the data
std_data= scaler.transform(input_data_reshaped)

prediction= model.predict(std_data)
print(prediction)

if(prediction[0]==0):
    print("Person is  not having heart attack")
else:
    print("Person is having heart attack")'''