<a href="https://colab.research.google.com/github/DattaNTR/Heart-Disease-Detection/blob/main/Copy_of_Heart_disease_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **HEART DISEASE DETECTION**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## **I. Importing required libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
%matplotlib inline
print(os.listdir())
warnings.filterwarnings('ignore')

## **II. Importing and understanding the dataset**

In [None]:
data = pd.read_csv("/content/heart (1).csv")

### Verifying whether it is a 'dataframe' object in pandas or not

In [None]:
type(data)

### Shape of dataset

In [None]:
data.shape

### Printing out a few columns

In [None]:
data.head(5)

### Description of dataset

In [None]:
data.describe()

In [None]:
data.info()

### Understanding our columns much better

In [None]:
info = ["age",
        "1: male, 0: female",
        "chest pain type, 1: typical angina, 2: atypical angina, 3: non-anginal pain, 4: asymptomatic",
        "resting blood pressure",
        "serum cholestoral in mg/dl",
        "fasting blood sugar > 120 mg/dl",
        "resting ECG values(0,1,2)",
        "max heartrate achieved",
        "exercise induced angina",
        "oldpeak = ST depression induced by exercise relative to rest",
        "the slope of the peak exercise ST segment",
        "no. of major vessels (0-3) colored by flourosopy",
        "thal: 3 = normal, 6 = fixed defect, 7 = revrsable defect"]

for i in range(len(info)):
  print(data.columns[i]+":\t\t"+info[i])

### Analysing the 'target' variable

In [None]:
data["target"].describe()

In [None]:
data["target"].unique()

## **III. Exploratory Data Analysis (EDA)**

### Analysing the target variable

In [None]:
y = data["target"]
target_temp = data.target.value_counts()
print(target_temp)
print("Percentage of patience without heart problems: "+str(round(target_temp[0]*100/303,2)))
print("Percentage of patience with heart problems: "+str(round(target_temp[1]*100/303,2)))

# **IV. Train Test Split**

In [None]:
from sklearn.model_selection import train_test_split
X = data.drop("target",axis=1)
Y = data["target"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=0)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
Y_train.shape

In [None]:
Y_test.shape

# **V. Model Fitting**

In [None]:
from sklearn.metrics import accuracy_score

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, Y_train)
Y_pred_lr = lr.predict(X_test)
score_lr = round(accuracy_score(Y_pred_lr, Y_test)*100,2)
print("The accuracy score achieved using Logistic Regression is: "+str(score_lr)+"%")

## Support Vector Machine (SVM)

In [None]:
from sklearn.svm import SVC
sv = SVC(kernel='linear')
sv.fit(X_train, Y_train)
Y_pred_svm = sv.predict(X_test)
score_svm = round(accuracy_score(Y_pred_svm, Y_test)*100,2)
print("The accuracy score achieved using SVM is: "+str(score_lr)+"%")

## K Nearest Neighbors (KNN)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, Y_train)
Y_pred_knn = knn.predict(X_test)
score_knn = round(accuracy_score(Y_pred_knn, Y_test)*100,2)
print("The accuracy score achieved using KNN is: "+str(score_knn)+"%")

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
max_accuracy = 0
for x in range(2000):
  dt = DecisionTreeClassifier(random_state=x)
  dt.fit(X_train, Y_train)
  Y_pred_dt = dt.predict(X_test)
  curr_accuracy = round(accuracy_score(Y_pred_dt, Y_test)*100,2)
  if(curr_accuracy > max_accuracy):
    max_accuracy = curr_accuracy
    best_x = x

dt = DecisionTreeClassifier(random_state=best_x)
dt.fit(X_train, Y_train)
Y_pred_dt = dt.predict(X_test)
score_dt = round(accuracy_score(Y_pred_dt, Y_test)*100,2)
print("The accuracy score achieved using Decision Tree is: "+str(score_dt)+"%")

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
max_accuracy = 0
for x in range(2000):
  rf = RandomForestClassifier(random_state=x)
  rf.fit(X_train, Y_train)
  Y_pred_rf = rf.predict(X_test)
  curr_accuracy = round(accuracy_score(Y_pred_rf, Y_test)*100,2)
  if(curr_accuracy > max_accuracy):
    max_accuracy = curr_accuracy
    best_x = x

rf = RandomForestClassifier(random_state=best_x)
rf.fit(X_train, Y_train)
Y_pred_rf = rf.predict(X_test)
score_rf = round(accuracy_score(Y_pred_rf, Y_test)*100,2)
print("The accuracy score achieved using Random Forest is: "+str(score_rf)+"%")

## XGBoost

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBClassifier(objective="binary:logistic",random_state=42)
xgb_model.fit(X_train, Y_train)
Y_pred_xgb = xgb_model.predict(X_test)
score_xgb = round(accuracy_score(Y_pred_xgb, Y_test)*100,2)
print("The accuracy score achieved using XGBoost is: "+str(score_xgb)+"%")

## Neural Network

In [None]:
!pip install keras

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()
model.add(Dense(11,activation='relu',input_dim=13))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, epochs=300)

In [None]:
Y_pred_nn = model.predict(X_test)
rounded = [round(x[0]) for x in Y_pred_nn]
Y_pred_nn = rounded
score_nn = round(accuracy_score(Y_pred_nn, Y_test)*100,2)
print("The accuracy score achieved using Neural Network is: "+str(score_nn)+"%")

# **VI. Output final score**

In [None]:
scores = [score_lr, score_svm, score_knn, score_dt, score_rf, score_xgb, score_nn]
algorithms = ["Logistic Regression","SVM","KNN","Decision Tree","Random Forest","XGBoost","Neural Networks"]
for i in range(len(algorithms)):
  print("The accuracy score achieved using "+algorithms[i]+" is: "+str(scores[i])+"%")

In [None]:
sns.set(rc={'figure.figsize':(10,4)})
plt.xlabel("Algorithms")
plt.ylabel("Accuracy score")
sns.barplot(x=algorithms, y=scores)
plt.show()

Therefore, Random Forest has good result when compared to other algorithms.

## **VII. Prediction on New Data**

In [None]:
new_data = pd.DataFrame({
    'age':52,
    'sex':1,
    'cp':0,
    'trestbps':125,
    'chol':212,
    'fbs':0,
    'restecg':1,
    'thalach':168,
    'exang':0,
    'oldpeak':1.0,
     'slope':2,
    'ca':2,
    'thal':3,
},index=[0])

In [None]:
new_data

In [None]:
p = rf.predict(new_data)
if p[0]==0:
  print("No Disease")
else:
  print("Disease")

## **VIII. Save Model Using Joblib**

In [None]:
import joblib
joblib.dump(rf,'trained_model.joblib')

In [None]:
from google.colab import files
files.download('trained_model.joblib')

## **IX. GUI (Graphical User Interface)**

In [None]:
!pip install ipywidgets

In [None]:
!pip install Ipython

In [None]:
import ipywidgets as widgets
from IPython.display import display

model = joblib.load('trained_model.joblib')

def make_prediction(age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal):
  input_data = [[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]]
  prediction = model.predict(input_data)
  if prediction[0] == 1:
    result_label.value = "Heart Disease Detected"
  else:
    result_label.value = "Heart Disease Not Detected"

age_input = widgets.FloatText(description='Age: ')
sex_input = widgets.IntText(min=0, max=1, description='Sex: ')
cp_input = widgets.IntText(min=1, max=4, description='Cerebral Palsy(CP): ')
trestbps_input = widgets.FloatText(description='Resting Blood Pressure: ')
chol_input = widgets.FloatText(description='Cholestrol: ')
fbs_input = widgets.IntText(min=0, max=1, description='Fasting Blood SUgar: ')
restecg_input = widgets.IntText(min=0, max=2, description='Resting ECG: ')
thalach_input = widgets.FloatText(description='Max Heart Rate: ')
exang_input = widgets.IntText(min=0, max=1, description='Exercise Induced Angina: ')
oldpeak_input = widgets.FloatText(description='Oldpeak: ')
slope_input = widgets.IntText(min=0, max=2, description='Slope: ')
ca_input = widgets.IntText(min=0, max=3, description='Cardiac Arrest: ')
thal_input = widgets.IntText(description='Thalassemia: ')

result_label = widgets.Label(value="Prediction: ")
widgets.interactive_output(
    make_prediction,
    {'age':age_input, 'sex': sex_input, 'cp': cp_input, 'trestbps': trestbps_input,
     'chol': chol_input, 'fbs': fbs_input, 'restecg': restecg_input, 'thalach': thalach_input,
     'exang': exang_input, 'oldpeak': oldpeak_input, 'slope': slope_input, 'ca': ca_input, 'thal': thal_input}
)

display(age_input, sex_input, cp_input, trestbps_input, chol_input, fbs_input, restecg_input, thalach_input, exang_input, oldpeak_input, slope_input,
        ca_input, thal_input)
display(result_label)
