In [None]:
import numpy
import plotly.offline
import scipy
import pandas
import matplotlib.pyplot as plt
import sklearn
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

## Neural Networks

This week, we'll predict the digits in an image using a neural network. Let's check our dataset:

In [None]:
from keras.datasets import mnist

(train_X, train_Y), (test_X, test_Y) = mnist.load_data()
train_X.shape

We can see that there are 60,000 square images in our training set, loading some images in the train set, and checking an example input:

In [None]:
plot_inds = [2, 433, 433]
plt.figure()
# subplot(n_row n_col draw_index), e.g. 123: 3rd index on a 1x2 plot grid
[(plt.subplot(130 + ind + 1), plt.imshow(train_X[train_ind])) for ind, train_ind in enumerate(plot_inds)]
# we need to specify color mapping to draw image correctly
plt.figure()
[(
    plt.subplot(130 + ind + 1),
    plt.imshow(train_X[train_ind], cmap=plt.get_cmap('gray'))
  ) for ind, train_ind in enumerate(plot_inds)]
# as an example, 14th row of an input
display(train_X[433, 13])
print(f'Max value: {train_X.max()}, Min value: {train_X.min()}')
# let's see what value Y takes for this X:
print(f'Y: {train_Y[433]}')

As we can see, inputs take values between 255 and 0, each corresponding to the level of whiteness in a pixel, and Y values are just the numbers, so we should divide X by 255 and convert Y to one-hot encoding.

In [None]:
train_X_scaled = train_X / 255
test_X_scaled = test_X / 255
train_Y01 = pandas.get_dummies(train_Y).to_numpy()
test_Y01 = pandas.get_dummies(test_Y).to_numpy()

Let's start by building a simple neural network:

In [None]:
model = Sequential()
# single layer, binary output corresponding to the one-hot encoding
model.add(Dense(train_Y01.shape[1], input_dim=numpy.prod(train_X.shape[1:]),
                activation='sigmoid'))
# we can include metrics to keep track of while training the model
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])
# training
model.fit(
    train_X_scaled.reshape(train_X.shape[0], numpy.prod(train_X.shape[1:])),
    train_Y01, epochs=5, batch_size=10)

Making predictions on test set using the trained model:

In [None]:
# accuracy over the test set
loss, accuracy = model.evaluate(
    test_X_scaled.reshape(test_X.shape[0], numpy.prod(train_X.shape[1:])),
    test_Y01)
print(f'Accuracy: {accuracy*100:.2f}%')
# predicting a single row:
test_row_id = 433
actual_value = test_Y[test_row_id]
X_values = test_X_scaled[test_row_id]
predicted_outcome = model.predict(
    X_values.reshape(1, numpy.prod(train_X.shape[1:]))).argmax()
# plotting
plt.figure()
plt.title(f'Model prediction for this digit: {predicted_outcome}')
plt.imshow(X_values, cmap=plt.get_cmap('gray'))

Accuracy isn't too good for this type of task, so let's add more layers, increase the number of iterations and see what happens:

In [None]:
model = Sequential()
model.add(Dense(50, input_dim=numpy.prod(train_X.shape[1:]),
                activation='sigmoid'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='sigmoid'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='sigmoid'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='sigmoid'))
# last layer
model.add(Dense(train_Y01.shape[1], activation='sigmoid'))
# we can include metrics to keep track of while training the model
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])
# training
model.fit(
    train_X_scaled.reshape(train_X.shape[0], numpy.prod(train_X.shape[1:])),
    train_Y01, epochs=30, batch_size=20)

Notice how accuracy improves at a slower rate initially, compared to the simple network. Calculating the accuracy on test set:

In [None]:
# accuracy over the test set
loss, accuracy = model.evaluate(
    test_X_scaled.reshape(test_X.shape[0], numpy.prod(train_X.shape[1:])),
    test_Y01)
print(f'Accuracy: {accuracy*100:.2f}%')
# predicting a single row:
test_row_id = 433
actual_value = test_Y[test_row_id]
X_values = test_X_scaled[test_row_id]
predicted_outcome = model.predict(
    X_values.reshape(1, numpy.prod(train_X.shape[1:]))).argmax()
# plotting
plt.figure()
plt.title(f'Model prediction for this digit: {predicted_outcome}')
plt.imshow(X_values, cmap=plt.get_cmap('gray'))

We can see that accuracy has improved! Now let's train a neural network on the titanic dataset.

In [None]:
from sklearn.model_selection import train_test_split

titanic_df = pandas.read_csv('titanic_train.csv')
columns_to_drop = ['PassengerId', 'Name', 'Ticket', 'Fare', 'Cabin']
titanic_df = titanic_df.drop(columns=columns_to_drop).dropna()
titanic_df.head()
# X and y values:
X_, y = titanic_df.drop(columns=['Survived']), titanic_df['Survived'].to_numpy()
X = pandas.get_dummies(X_).to_numpy()
train_X, test_X, train_Y01, test_Y01 = train_test_split(X, y, test_size=0.33,
                                                    random_state=433)
# scaling X
max_values = train_X.max(axis=0)
train_X_scaled = train_X / max_values
test_X_scaled = test_X / max_values

Using a simple network:

In [None]:
model = Sequential()
model.add(Dense(1, input_dim=numpy.prod(train_X.shape[1:]),
                activation='sigmoid'))
# we can include metrics to keep track of while training the model
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])
# training
model.fit(
    train_X_scaled.reshape(train_X.shape[0], numpy.prod(train_X.shape[1:])),
    train_Y01, epochs=30, batch_size=20)

Accuracy on test:

In [None]:
# accuracy over the test set
loss, accuracy = model.evaluate(
    test_X_scaled.reshape(test_X.shape[0], numpy.prod(train_X.shape[1:])),
    test_Y01)
print(f'Accuracy: {accuracy*100:.2f}%')
# predicting a single row:
test_row_id = 200
actual_value = test_Y[test_row_id]
X_values = test_X_scaled[test_row_id]
predicted_outcome = model.predict(
    X_values.reshape(1, numpy.prod(train_X.shape[1:])))[0, 0]
print(
    f'Row: {test_row_id}, '
    f'Actual: {"Survived" if test_Y[test_row_id] else "Did not survive"}'
    f', Predicted: {"Survived" if predicted_outcome > 0.5 else "Did not survive"}')
# predictions over the whole test set:
X_values = test_X_scaled
actual_values = test_Y
data = [
        ('Yes' if survived else 'No',
         'Yes' if pred > 0.5 else 'No')
        for pred, survived in zip(
            model.predict(X_values.reshape(
                X_values.shape[0], numpy.prod(train_X.shape[1:])))[:, 0],
            actual_values
            )
        ]
results_df = pandas.DataFrame(data, columns=['Actual', 'Predicted'])
model.predict(X_values.reshape(
                X_values.shape[0], numpy.prod(train_X.shape[1:])))[:, 0]