In [1]:
# HEART DISEASE PREDICTION

**IMPORTING DEPENDENCIES/LIBRARIES**

In [2]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

**IMPORTING OUR DATASET**

In [3]:
heartData = pd.read_csv('D:\PROGRAMMING\python\ML\heart_disease_data.csv')

In [4]:
heartData # No need to clean dataset as everything seems fine 

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,48,1,0,130,256,1,0,150,1,0.0,2,2,3,0
1,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
2,44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
3,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
4,56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,60,1,2,140,185,0,0,155,0,3.0,1,0,2,0
602,67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
603,40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
604,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


**REMOVING TARGET COLUMN FROM DATASET**

In [5]:
x = heartData.drop(columns='target', axis=1)
y = heartData['target']

In [6]:
x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,48,1,0,130,256,1,0,150,1,0.0,2,2,3
1,61,1,0,148,203,0,1,161,0,0.0,2,1,3
2,44,0,2,118,242,0,1,149,0,0.3,1,1,2
3,47,1,0,110,275,0,0,118,1,1.0,1,1,2
4,56,1,3,120,193,0,0,162,0,1.9,1,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,60,1,2,140,185,0,0,155,0,3.0,1,0,2
602,67,0,2,115,564,0,0,160,0,1.6,1,0,3
603,40,1,3,140,199,0,1,178,1,1.4,2,0,3
604,50,0,0,110,254,0,0,159,0,0.0,2,0,2


In [7]:
y

0      0
1      0
2      1
3      0
4      1
      ..
601    0
602    1
603    1
604    1
605    0
Name: target, Length: 606, dtype: int64

**SPLITTING INTO TRAINING AND TESTING DATA**

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)

In [9]:
print(x.shape, x_train.shape, x_test.shape)

(606, 13) (484, 13) (122, 13)


**MODEL TRAINING**

In [10]:
model = LogisticRegression()

In [11]:
model.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**MODEL EVALUATION**

*Accuracy on training data*

In [12]:
x_train_pred = model.predict(x_train)
training_data_accuracy = accuracy_score(x_train_pred, y_train)
print(f"Accuracy on training data: {round(training_data_accuracy, 2)}")

Accuracy on training data: 0.86


*Accuracy on test data*

In [13]:
x_test_pred = model.predict(x_test)
test_data_accuracy = accuracy_score(x_test_pred, y_test)
print(f"Accuracy on training data: {round(test_data_accuracy, 2)}")

Accuracy on training data: 0.84


**BUILDING A PREDICTIVE SYSTEM**

In [14]:
input_data = (44,0,2,118,242,0,1,149,0,0.3,1,1,2) # taking sample input from dataset itself, but this can be done by taking user's input too 
input_data_as_array = np.asarray(input_data)
input_data_reshaped = input_data_as_array.reshape(1, -1)
prediction = model.predict(input_data_reshaped)
if prediction[0] == 1:
    print(f"The person has heart disease.")
else:
    print(f"The person does not has heart disease.")

The person has heart disease.




# SAVING THE MODEL SO WE DON'T NEED TO RUN AGAIN AND AGAIN 

In [15]:
import pickle

In [16]:
fileName = 'heartDiseasePrediction_model.sav'
pickle.dump(model, open(fileName, 'wb'))

In [17]:
loadedModel = pickle.load(open('heartDiseasePrediction_model.sav', 'rb')) 
# NOW WE CAN SIMPLY USE THIS LINE TO LOAD MODEL AS WE HAVE ALREADY SAVED IT USING PICKLE SO NO NEED TO RUN WHOLE THING AGAIN AND AGAIN

In [18]:
input_data = (44,0,2,118,242,0,1,149,0,0.3,1,1,2)
input_data_as_array = np.asarray(input_data)
input_data_reshaped = input_data_as_array.reshape(1, -1)
prediction = loadedModel.predict(input_data_reshaped)
if prediction[0] == 1:
    print(f"The person has heart disease.")
else:
    print(f"The person does not has heart disease.")

The person has heart disease.


