In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

In [None]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


Now, let me explain the feature names:

age: The age of the patient in years.

sex: The gender of the patient. Usually, 1 represents male and 0 represents female.

cp (Chest Pain type): The type of chest pain experienced by the patient. It is usually categorized into:

0: Typical angina
1: Atypical angina
2: Non-anginal pain
3: Asymptomatic
trestbps (Resting Blood Pressure): The patient’s resting blood pressure in mm Hg (millimeters of mercury).

chol (Serum Cholesterol): The patient’s cholesterol level in mg/dl (milligrams per deciliter).

fbs (Fasting Blood Sugar): Indicates if the patient’s fasting blood sugar is greater than 120 mg/dl.

1: True
0: False
restecg (Resting Electrocardiographic Results): Results from the patient’s resting ECG. It can have values like:

0: Normal
1: Having ST-T wave abnormality (e.g., T wave inversions and/or ST elevation or depression of > 0.05 mV)
2: Showing probable or definite left ventricular hypertrophy by Estes' criteria
thalach (Maximum Heart Rate Achieved): The maximum heart rate achieved by the patient during the test.

exang (Exercise-Induced Angina): Indicates whether the patient experienced angina during exercise.

1: Yes
0: No
oldpeak: ST depression induced by exercise relative to rest.

slope (Slope of the Peak Exercise ST Segment): Describes the slope of the peak exercise ST segment. It can have values like:

0: Upsloping
1: Flat
2: Downsloping
ca (Number of Major Vessels Colored by Fluoroscopy): The number of major vessels (0-3) colored by fluoroscopy.

thal: A blood disorder called thalassemia. It can have values like:

1: Normal
2: Fixed defect (no reversible defect)
3: Reversible defect (shows some sort of reversible defect)
target: The presence of heart disease in the patient (as explained above):

1: Heart disease
0: No heart disease

In [None]:
df.shape

(1025, 14)

In [None]:
df.isnull().sum()

Unnamed: 0,0
age,0
sex,0
cp,0
trestbps,0
chol,0
fbs,0
restecg,0
thalach,0
exang,0
oldpeak,0


In [None]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,54.434146,0.69561,0.942439,131.611707,246.0,0.149268,0.529756,149.114146,0.336585,1.071512,1.385366,0.754146,2.323902,0.513171
std,9.07229,0.460373,1.029641,17.516718,51.59251,0.356527,0.527878,23.005724,0.472772,1.175053,0.617755,1.030798,0.62066,0.50007
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,48.0,0.0,0.0,120.0,211.0,0.0,0.0,132.0,0.0,0.0,1.0,0.0,2.0,0.0
50%,56.0,1.0,1.0,130.0,240.0,0.0,1.0,152.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,275.0,0.0,1.0,166.0,1.0,1.8,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [None]:
df['target'].value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
1,526
0,499


In [None]:
X = df.drop('target',axis='columns')
y =df['target']

Splitting the data into training data  and test data


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state =42 , stratify= y, test_size= 0.15)

In [None]:
print(X.shape , X_train.shape , X_test.shape)

(1025, 13) (871, 13) (154, 13)


In [None]:
# Model Training

model = LogisticRegression ()

model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
model.score(X_test,y_test)

0.8571428571428571

In [None]:
y_pred = model.predict(X_test)

In [None]:
#recall score

score = recall_score(y_pred,y_test , pos_label= 1)
score


0.8202247191011236

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred, y_test)
cm

array([[59,  6],
       [16, 73]])

In [None]:
# Builing  a Hear Disease Prediction System

input_data = [71,0,0,112,149,0,1,125,0,1.6,1,0,2]

input_data_array = np.asarray(input_data)

reshaped_input = input_data_array.reshape(1,-1)

prediction =model.predict(reshaped_input )

if prediction == 1:
  print ('The person has Heart Disease')
else:
  print('The person does not have Heart Disease')

The person has Heart Disease


