### Importing the required libraries

By:Ashish Anand

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

Data Collection and Processing

In [2]:
heart_data = pd.read_csv('heart.csv')


In [3]:
heart_data.head()

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,Target
0,63,1,typical,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2,reversable,1
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0,normal,0


In [4]:
heart_data.shape

(303, 14)

In [5]:
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Age        303 non-null    int64  
 1   Sex        303 non-null    int64  
 2   ChestPain  303 non-null    object 
 3   RestBP     303 non-null    int64  
 4   Chol       303 non-null    int64  
 5   Fbs        303 non-null    int64  
 6   RestECG    303 non-null    int64  
 7   MaxHR      303 non-null    int64  
 8   ExAng      303 non-null    int64  
 9   Oldpeak    303 non-null    float64
 10  Slope      303 non-null    int64  
 11  Ca         303 non-null    int64  
 12  Thal       301 non-null    object 
 13  Target     303 non-null    int64  
dtypes: float64(1), int64(11), object(2)
memory usage: 33.3+ KB


In [6]:
heart_data.isnull().sum()

Age          0
Sex          0
ChestPain    0
RestBP       0
Chol         0
Fbs          0
RestECG      0
MaxHR        0
ExAng        0
Oldpeak      0
Slope        0
Ca           0
Thal         2
Target       0
dtype: int64

In [7]:
heart_data.describe()

Unnamed: 0,Age,Sex,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.438944,0.679868,131.689769,246.693069,0.148515,0.990099,149.607261,0.326733,1.039604,1.60066,0.686469,0.458746
std,9.038662,0.467299,17.599748,51.776918,0.356198,0.994971,22.875003,0.469794,1.161075,0.616226,0.947661,0.49912
min,29.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0,0.0
25%,48.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,0.0
50%,56.0,1.0,130.0,241.0,0.0,1.0,153.0,0.0,0.8,2.0,0.0,0.0
75%,61.0,1.0,140.0,275.0,0.0,2.0,166.0,1.0,1.6,2.0,1.0,1.0
max,77.0,1.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,3.0,3.0,1.0


In [8]:
# checking the distribution of Target Variable
heart_data ['Target'].value_counts()

0    164
1    139
Name: Target, dtype: int64

Splitting the Features and Target

In [9]:
X = heart_data .drop(columns='Target', axis=1)
Y = heart_data ['Target']

In [10]:
X.head()

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal
0,63,1,typical,145,233,1,2,150,0,2.3,3,0,fixed
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3,normal
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2,reversable
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0,normal
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0,normal


In [11]:
Y.head()

0    0
1    1
2    1
3    0
4    0
Name: Target, dtype: int64

In [12]:
#checking the datatypes
X.dtypes

Age            int64
Sex            int64
ChestPain     object
RestBP         int64
Chol           int64
Fbs            int64
RestECG        int64
MaxHR          int64
ExAng          int64
Oldpeak      float64
Slope          int64
Ca             int64
Thal          object
dtype: object

In [13]:
 X['ChestPain'].unique()

array(['typical', 'asymptomatic', 'nonanginal', 'nontypical'],
      dtype=object)

In [14]:
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()
X['ChestPain']= label_encoder.fit_transform(X['ChestPain'])

In [15]:
#checking after label encoding
X['ChestPain'].unique()

array([3, 0, 1, 2])

In [16]:
X.dtypes

Age            int64
Sex            int64
ChestPain      int32
RestBP         int64
Chol           int64
Fbs            int64
RestECG        int64
MaxHR          int64
ExAng          int64
Oldpeak      float64
Slope          int64
Ca             int64
Thal          object
dtype: object

In [17]:
# checking the Thal 
X['Thal'].unique()

array(['fixed', 'normal', 'reversable', nan], dtype=object)

In [18]:
# label encoding the Thal
X['Thal']= label_encoder.fit_transform(X['Thal'])

In [19]:
# checking the Thal after label encoding
X['Thal'].unique()

array([0, 1, 2, 3])

Splitting the Data into Training data & Test Data

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [21]:
print(X.shape, X_train.shape, X_test.shape)

(303, 13) (242, 13) (61, 13)


Model Training

Logistic Regression

In [22]:
model = LogisticRegression()

In [23]:
# training the LogisticRegression model with Training data
model.fit(X_train, Y_train)

LogisticRegression()

Model Evaluation

Accuracy Score

In [24]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [25]:
print('Accuracy on Training data : ', training_data_accuracy)

Accuracy on Training data :  0.8677685950413223


In [26]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [27]:
print('Accuracy on Test data : ', test_data_accuracy)

Accuracy on Test data :  0.8524590163934426


In [28]:
input = (80,1,0,132,268,0,0,160,0,3.6,0,2,3)

input_array = np.asarray(input)

input_data_reshaped = input_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)

if (prediction[0]== 0):
  print('No Heart Disease')
else:
  print('The Person has Heart Disease')

The Person has Heart Disease
