In [2]:
import pandas as pd

In [3]:
heartDiseaseData=pd.read_csv('HeartDisease.csv')
heartDiseaseData.head()

Unnamed: 0,age,gender,chest_pain,rest_bps,cholestrol,fasting_blood_sugar,rest_ecg,thalach,exer_angina,old_peak,slope,ca,thalassemia,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


# Train and Test Split

In [4]:
x=heartDiseaseData.drop(['target'],axis=1)
y=heartDiseaseData[['target']]

In [5]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=100)

In [6]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((242, 13), (61, 13), (242, 1), (61, 1))

# Processing of Train Set

# Processing of Categorical Columns

In [7]:
heartTrainCat=x_train.select_dtypes(object)

In [8]:
heartTrainCat.head()

193
157
143
183
246


In [9]:
heartTrainCat.isna().sum()

Series([], dtype: float64)

# Processing of Numerical Columns

In [10]:
import numpy as np
heartTrainNum=x_train.select_dtypes(np.number)

In [11]:
heartTrainNum.head()

Unnamed: 0,age,gender,chest_pain,rest_bps,cholestrol,fasting_blood_sugar,rest_ecg,thalach,exer_angina,old_peak,slope,ca,thalassemia
193,60,1,0,145,282,0,0,142,1,2.8,1,2,3
157,35,1,1,122,192,0,1,174,0,0.0,2,0,2
143,67,0,0,106,223,0,1,142,0,0.3,2,2,2
183,58,1,2,112,230,0,0,165,0,2.5,1,1,3
246,56,0,0,134,409,0,0,150,1,1.9,1,2,3


In [12]:
heartTrainNum.dtypes

age                      int64
gender                   int64
chest_pain               int64
rest_bps                 int64
cholestrol               int64
fasting_blood_sugar      int64
rest_ecg                 int64
thalach                  int64
exer_angina              int64
old_peak               float64
slope                    int64
ca                       int64
thalassemia              int64
dtype: object

In [13]:
heartTrainNum.isna().sum()

age                    0
gender                 0
chest_pain             0
rest_bps               0
cholestrol             0
fasting_blood_sugar    0
rest_ecg               0
thalach                0
exer_angina            0
old_peak               0
slope                  0
ca                     0
thalassemia            0
dtype: int64

In [14]:
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()

In [15]:
ss.fit(heartTrainNum)

StandardScaler()

In [16]:
heartTrainNumScaled=ss.transform(heartTrainNum)

In [20]:
trainDataFinal=heartTrainNumScaled.copy()

# Building a model

In [21]:
# create Classes of all the algorithms
from sklearn.tree import DecisionTreeClassifier

In [22]:
dt=DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, 
                          min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, 
                          min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, ccp_alpha=0.0)

In [23]:
y_train.value_counts()

target
1         132
0         110
dtype: int64

In [24]:
dt.fit(trainDataFinal,y_train)

DecisionTreeClassifier()

# Test Set Processing

In [25]:
heartDataTestCat=x_test.select_dtypes(object)
heartDataTestCat.head()

24
174
30
63
180


In [26]:
heartDataTestNum=x_test.select_dtypes(np.number)
heartDataTestNum.head()

Unnamed: 0,age,gender,chest_pain,rest_bps,cholestrol,fasting_blood_sugar,rest_ecg,thalach,exer_angina,old_peak,slope,ca,thalassemia
24,40,1,3,140,199,0,1,178,1,1.4,2,0,3
174,60,1,0,130,206,0,0,132,1,2.4,1,2,3
30,41,0,1,105,198,0,1,168,0,0.0,2,1,2
63,41,1,1,135,203,0,1,132,0,0.0,1,0,1
180,55,1,0,132,353,0,1,132,1,1.2,1,1,3


In [27]:
heartDataTestNum.isna().sum()

age                    0
gender                 0
chest_pain             0
rest_bps               0
cholestrol             0
fasting_blood_sugar    0
rest_ecg               0
thalach                0
exer_angina            0
old_peak               0
slope                  0
ca                     0
thalassemia            0
dtype: int64

In [28]:
heartTestNumScaled=ss.transform(heartDataTestNum)

In [29]:
TestDataFinal=heartTestNumScaled.copy()

# Predictions and Metrics

In [30]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

In [31]:
dtPredicts=dt.predict(TestDataFinal)

# Decison Tree Model Performance

In [32]:
print("the accuracy is",accuracy_score(dtPredicts, y_test))
print("the precision is",precision_score(dtPredicts, y_test))
print("the recall is",recall_score(dtPredicts, y_test))
print("the f1_score is",f1_score(dtPredicts, y_test))

the accuracy is 0.7540983606557377
the precision is 0.7878787878787878
the recall is 0.7647058823529411
the f1_score is 0.7761194029850745


In [33]:
TestDataFinal[0].shape

(13,)

In [34]:
dt.predict(TestDataFinal[0].reshape(1,13))

array([0], dtype=int64)