In [173]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

In [162]:
heart_df = pd.read_csv("heart.dat", sep=' ', header=None)
heart_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,70.0,1.0,4.0,130.0,322.0,0.0,2.0,109.0,0.0,2.4,2.0,3.0,3.0,2
1,67.0,0.0,3.0,115.0,564.0,0.0,2.0,160.0,0.0,1.6,2.0,0.0,7.0,1
2,57.0,1.0,2.0,124.0,261.0,0.0,0.0,141.0,0.0,0.3,1.0,0.0,7.0,2
3,64.0,1.0,4.0,128.0,263.0,0.0,0.0,105.0,1.0,0.2,2.0,1.0,7.0,1
4,74.0,0.0,2.0,120.0,269.0,0.0,2.0,121.0,1.0,0.2,1.0,1.0,3.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52.0,1.0,3.0,172.0,199.0,1.0,0.0,162.0,0.0,0.5,1.0,0.0,7.0,1
266,44.0,1.0,2.0,120.0,263.0,0.0,0.0,173.0,0.0,0.0,1.0,0.0,7.0,1
267,56.0,0.0,2.0,140.0,294.0,0.0,2.0,153.0,0.0,1.3,2.0,0.0,3.0,1
268,57.0,1.0,4.0,140.0,192.0,0.0,0.0,148.0,0.0,0.4,2.0,0.0,6.0,1


In [163]:
heart_df.columns = ["age", "sex", "chest pain type (4 values)", "resting blood pressure", "serum cholesterol in mg/dl", "fasting blood sugar > 120 mg/dl", " resting electrocardiographic results (values 0,1,2)", "maximum heart rate achieved", "exercise induced angina", "oldpeak = ST depression induced by exercise relative to rest", "the slope of the peak exercise ST segment", "number of major vessels (0-3) colored by flourosopy", "thal: 3 = normal; 6 = fixed defect; 7 = reversable defect", "Absence (1) or presence (2)"]

In [164]:
heart_df.head ()

Unnamed: 0,age,sex,chest pain type (4 values),resting blood pressure,serum cholesterol in mg/dl,fasting blood sugar > 120 mg/dl,"resting electrocardiographic results (values 0,1,2)",maximum heart rate achieved,exercise induced angina,oldpeak = ST depression induced by exercise relative to rest,the slope of the peak exercise ST segment,number of major vessels (0-3) colored by flourosopy,thal: 3 = normal; 6 = fixed defect; 7 = reversable defect,Absence (1) or presence (2)
0,70.0,1.0,4.0,130.0,322.0,0.0,2.0,109.0,0.0,2.4,2.0,3.0,3.0,2
1,67.0,0.0,3.0,115.0,564.0,0.0,2.0,160.0,0.0,1.6,2.0,0.0,7.0,1
2,57.0,1.0,2.0,124.0,261.0,0.0,0.0,141.0,0.0,0.3,1.0,0.0,7.0,2
3,64.0,1.0,4.0,128.0,263.0,0.0,0.0,105.0,1.0,0.2,2.0,1.0,7.0,1
4,74.0,0.0,2.0,120.0,269.0,0.0,2.0,121.0,1.0,0.2,1.0,1.0,3.0,1


In [165]:
heart_df.shape
# There are 270 observations. This means that your neural network will have an input data of shape 270 x 13, 
# excluding the target variable

(270, 14)

In [166]:
# Next, you can check for missing values and also the data types. 
# A Neural Network expects all features to be numeric and not contain missing values.
heart_df.isna().sum()

age                                                             0
sex                                                             0
chest pain type (4 values)                                      0
resting blood pressure                                          0
serum cholesterol in mg/dl                                      0
fasting blood sugar > 120 mg/dl                                 0
 resting electrocardiographic results (values 0,1,2)            0
maximum heart rate achieved                                     0
exercise induced angina                                         0
oldpeak = ST depression induced by exercise relative to rest    0
the slope of the peak exercise ST segment                       0
number of major vessels (0-3) colored by flourosopy             0
thal: 3 = normal; 6 = fixed defect; 7 = reversable defect       0
Absence (1) or presence (2)                                     0
dtype: int64

In [167]:
heart_df.dtypes

age                                                             float64
sex                                                             float64
chest pain type (4 values)                                      float64
resting blood pressure                                          float64
serum cholesterol in mg/dl                                      float64
fasting blood sugar > 120 mg/dl                                 float64
 resting electrocardiographic results (values 0,1,2)            float64
maximum heart rate achieved                                     float64
exercise induced angina                                         float64
oldpeak = ST depression induced by exercise relative to rest    float64
the slope of the peak exercise ST segment                       float64
number of major vessels (0-3) colored by flourosopy             float64
thal: 3 = normal; 6 = fixed defect; 7 = reversable defect       float64
Absence (1) or presence (2)                                     

In [168]:
# There are no missing values in the dataset, and all features are numeric. 
# Next, you’ll separate the target from the data, split into train and test set, and then standardize the data.
y_label = heart_df[['Absence (1) or presence (2)']].values
X_label = heart_df.iloc[:,0:13]
X_label

Unnamed: 0,age,sex,chest pain type (4 values),resting blood pressure,serum cholesterol in mg/dl,fasting blood sugar > 120 mg/dl,"resting electrocardiographic results (values 0,1,2)",maximum heart rate achieved,exercise induced angina,oldpeak = ST depression induced by exercise relative to rest,the slope of the peak exercise ST segment,number of major vessels (0-3) colored by flourosopy,thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
0,70.0,1.0,4.0,130.0,322.0,0.0,2.0,109.0,0.0,2.4,2.0,3.0,3.0
1,67.0,0.0,3.0,115.0,564.0,0.0,2.0,160.0,0.0,1.6,2.0,0.0,7.0
2,57.0,1.0,2.0,124.0,261.0,0.0,0.0,141.0,0.0,0.3,1.0,0.0,7.0
3,64.0,1.0,4.0,128.0,263.0,0.0,0.0,105.0,1.0,0.2,2.0,1.0,7.0
4,74.0,0.0,2.0,120.0,269.0,0.0,2.0,121.0,1.0,0.2,1.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52.0,1.0,3.0,172.0,199.0,1.0,0.0,162.0,0.0,0.5,1.0,0.0,7.0
266,44.0,1.0,2.0,120.0,263.0,0.0,0.0,173.0,0.0,0.0,1.0,0.0,7.0
267,56.0,0.0,2.0,140.0,294.0,0.0,2.0,153.0,0.0,1.3,2.0,0.0,3.0
268,57.0,1.0,4.0,140.0,192.0,0.0,0.0,148.0,0.0,0.4,2.0,0.0,6.0


In [169]:
# first, you dropped the target from the training dataset, and the replace the classes with 0 and 1. 
where_absence = np.where(y_label == 1)
where_presence = np.where(y_label == 2)
y_label[where_absence] = 0
y_label[where_presence] = 1
y_label.shape

(270, 1)

In [170]:
#  Next, you used the handy train_test_split function from sklearn to split the data into train and test set, with the test set taking 20 percent of the data.
X_train, X_test, y_train, y_test = train_test_split(X_label, y_label, test_size = 0.2)

In [176]:
# Finally, you standardized the dataset using the StandardScaler module of sklearn.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [None]:
# The neural net above will have one hidden layer and a final output layer.
# The input layer will have 13 nodes because we have 13 features, excluding the target.
# The hidden layer can accept any number of nodes, but you’ll start with 8, and the final layer,
# which makes the predictions, will have 1 node.
# Think of weights as a measure of how sure you are that a feature contributes to a prediction
# and the bias as a base value that your predictions must start from.
