In [1]:
import pandas as pd
import numpy as nm
from sklearn.model_selection import train_test_split # for splitting the dataset into training and testing part
from sklearn import metrics # For confusion matrix
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("Heart.csv")

In [3]:
data.shape # Shape of data (Rows x Columns)

(303, 15)

In [4]:
for x in data: # Finding missing values
    print(f"{x : <20}{data[x].isnull().sum() : >10}")
    
## Can also use
#data.isnull().sum()

Unnamed: 0                   0
Age                          0
Sex                          0
ChestPain                    0
RestBP                       0
Chol                         0
Fbs                          0
RestECG                      0
MaxHR                        0
ExAng                        0
Oldpeak                      0
Slope                        0
Ca                           4
Thal                         2
AHD                          0


In [5]:
data.dtypes # Data type of each column

Unnamed: 0      int64
Age             int64
Sex             int64
ChestPain      object
RestBP          int64
Chol            int64
Fbs             int64
RestECG         int64
MaxHR           int64
ExAng           int64
Oldpeak       float64
Slope           int64
Ca            float64
Thal           object
AHD            object
dtype: object

In [6]:
for x in data: # Finding the number of zeroes in each column
    print(f"{x : <20}{(data[x] == 0).sum() : >10}")
    
## Can also use
#(data == 0).sum()

Unnamed: 0                   0
Age                          0
Sex                         97
ChestPain                    0
RestBP                       0
Chol                         0
Fbs                        258
RestECG                    151
MaxHR                        0
ExAng                      204
Oldpeak                     99
Slope                        0
Ca                         176
Thal                         0
AHD                          0


In [7]:
data["Age"].mean()

54.43894389438944

In [8]:
new_data = data[["Age", "Sex", "ChestPain", "RestBP", "Chol"]]
new_data

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol
0,63,1,typical,145,233
1,67,1,asymptomatic,160,286
2,67,1,asymptomatic,120,229
3,37,1,nonanginal,130,250
4,41,0,nontypical,130,204
...,...,...,...,...,...
298,45,1,typical,110,264
299,68,1,asymptomatic,144,193
300,57,1,asymptomatic,130,131
301,57,0,nontypical,130,236


In [9]:
train, test = train_test_split(new_data, test_size = 0.25, random_state = 672)

In [10]:
train.shape
train

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol
208,55,1,nontypical,130,262
299,68,1,asymptomatic,144,193
55,54,1,asymptomatic,124,266
262,60,0,typical,150,240
211,38,1,typical,120,231
...,...,...,...,...,...
60,51,0,asymptomatic,130,305
255,42,0,nonanginal,120,209
165,57,1,asymptomatic,132,207
177,56,1,asymptomatic,132,184


In [11]:
test.shape
test

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol
32,64,1,nonanginal,140,335
293,63,1,asymptomatic,140,187
54,60,1,asymptomatic,130,253
203,64,0,nonanginal,140,313
102,57,0,asymptomatic,128,303
...,...,...,...,...,...
237,46,1,asymptomatic,120,249
182,42,1,typical,148,244
169,45,0,nontypical,112,160
186,42,1,nonanginal,120,240


In [12]:
X = data.drop(["Unnamed: 0", "AHD"], axis = "columns")
X

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal
0,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,fixed
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,normal
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,reversable
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,normal
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45,1,typical,110,264,0,0,132,0,1.2,2,0.0,reversable
299,68,1,asymptomatic,144,193,1,0,141,0,3.4,2,2.0,reversable
300,57,1,asymptomatic,130,131,0,0,115,1,1.2,2,1.0,reversable
301,57,0,nontypical,130,236,0,2,174,0,0.0,2,1.0,normal


In [13]:
Y = data["AHD"]
Y

0       No
1      Yes
2      Yes
3       No
4       No
      ... 
298    Yes
299    Yes
300    Yes
301    Yes
302     No
Name: AHD, Length: 303, dtype: object

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [15]:
X_test

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal
225,34,0,nontypical,118,210,0,0,192,0,0.7,1,0.0,normal
152,67,0,nonanginal,115,564,0,2,160,0,1.6,2,0.0,reversable
228,54,1,asymptomatic,110,206,0,2,108,1,0.0,2,1.0,normal
201,64,0,asymptomatic,180,325,0,0,154,1,0.0,1,0.0,normal
52,44,1,asymptomatic,112,290,0,2,153,0,0.0,1,1.0,normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...
46,51,1,nonanginal,110,175,0,0,123,0,0.6,1,0.0,normal
160,46,1,nontypical,101,197,1,0,156,0,0.0,1,0.0,reversable
232,49,1,nonanginal,118,149,0,2,126,0,0.8,1,3.0,normal
181,56,0,asymptomatic,134,409,0,2,150,1,1.9,2,2.0,reversable


In [16]:
X_train

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal
173,62,0,asymptomatic,140,394,0,2,157,0,1.2,2,0.0,normal
261,58,0,nontypical,136,319,1,2,152,0,0.0,1,2.0,normal
37,57,1,asymptomatic,150,276,0,2,112,1,0.6,2,1.0,fixed
101,34,1,typical,118,182,0,2,174,0,0.0,1,0.0,normal
166,52,1,nonanginal,138,223,0,0,169,0,0.0,1,,normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,58,1,asymptomatic,146,218,0,0,105,0,2.0,2,1.0,reversable
192,43,1,asymptomatic,132,247,1,2,143,1,0.1,2,,reversable
117,35,0,asymptomatic,138,183,0,0,182,0,1.4,1,0.0,normal
47,50,1,asymptomatic,150,243,0,2,128,0,2.6,2,0.0,reversable
