# Neural Network Tutorial

In [64]:
import pandas as pd
from keras import models, layers, optimizers, regularizers
import numpy as np
import random
from sklearn import model_selection

## Prepare the Data

There is nothing new here so we will quickly run through this.

In [65]:
file_name = 'SAheart.data'
data = pd.read_csv(file_name, sep=',', index_col=0)

In [66]:
data['famhist_true'] = data['famhist'] == 'Present'
data['famhist_false'] = data['famhist'] == 'Absent'
data = data.drop(['famhist'], axis=1)
data.head()

Unnamed: 0_level_0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,chd,famhist_true,famhist_false
row.names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,160,12.0,5.73,23.11,49,25.3,97.2,52,1,True,False
2,144,0.01,4.41,28.61,55,28.87,2.06,63,1,False,True
3,118,0.08,3.48,32.28,52,29.14,3.81,46,0,True,False
4,170,7.5,6.41,38.03,51,31.99,24.26,58,1,True,False
5,134,13.6,3.5,27.78,60,25.99,57.34,49,1,True,False


In [67]:
def get_splits(data):
    np.random.seed(42)
    random.seed(42)
    train, test = model_selection.train_test_split(data)
    x_train = train.loc[:, train.columns != 'chd']
    y_train = train['chd']
    x_test = test.loc[:, test.columns != 'chd']
    y_test = test['chd']
    return x_train, y_train, x_test, y_test

In [68]:
x_train, y_train, x_test, y_test = get_splits(data)

# Create a Baseline Model

In [158]:
np.random.seed(42)
model = models.Sequential()
#model.add(layers.Dense(input_dim=10,
#                       units=10, 
#                       activation='sigmoid',
#                       kernel_regularizer=regularizers.l2(0.1)))
model.add(layers.Dense(input_dim=10,
                       units=1,
                       activation='sigmoid',                         
                       #kernel_regularizer=regularizers.l2(0.0001)
                      ))

In [159]:
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1),
              metrics=['accuracy'])

In [161]:
class_weight = {
    0: 302 / 462,
    1: 160 / 462
}
model.fit(x_train, y_train, epochs=5, batch_size=1, class_weight=class_weight)
train_acc = model.evaluate(x_train, y_train, batch_size=32)[1]
test_acc = model.evaluate(x_test, y_test, batch_size=32)[1]
print('Training accuracy: %s' % train_acc)
print('Testing accuracy: %s' % test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training accuracy: 0.3439306363549536
Testing accuracy: 0.3534482779174015


In [107]:
len(data)

462

In [110]:
len(data[data['chd'] == True])

160

In [111]:
462 - 160

302

In [130]:
302/462

0.6536796536796536

In [134]:
model.predict(x_test)

array([[0.47871232],
       [0.47871232],
       [0.47948888],
       [0.47871232],
       [0.47871226],
       [0.47875473],
       [0.47871232],
       [0.4787124 ],
       [0.47871232],
       [0.47871232],
       [0.47871232],
       [0.45593017],
       [0.47871232],
       [0.47809362],
       [0.47871232],
       [0.47288302],
       [0.47871232],
       [0.47872707],
       [0.472862  ],
       [0.35813227],
       [0.47871232],
       [0.47871232],
       [0.47871232],
       [0.4782905 ],
       [0.47869495],
       [0.47870964],
       [0.47871232],
       [0.47871232],
       [0.47868913],
       [0.47871253],
       [0.478332  ],
       [0.47871232],
       [0.47871232],
       [0.472724  ],
       [0.47871232],
       [0.47278473],
       [0.35598496],
       [0.47871232],
       [0.356725  ],
       [0.47776252],
       [0.4787124 ],
       [0.46966943],
       [0.47870806],
       [0.47871232],
       [0.47871232],
       [0.47720683],
       [0.47871232],
       [0.478