In [1]:
import numpy as np
import MyNeuralNet
from lr_utils import load_dataset
from sklearn.metrics import accuracy_score

In [2]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()

In [3]:
train_x_orig_shape = train_x_orig.shape
train_y_shape = train_y.shape
test_x_orig_shape = test_x_orig.shape
test_y_shape = test_y.shape

print(f"train_x_orig shape: {train_x_orig.shape} \ntrain_y shape: {train_y.shape} \ntest_x_orig shape: {test_x_orig.shape} \ntest_y shape: {test_y.shape}")

train_x_orig shape: (209, 64, 64, 3) 
train_y shape: (1, 209) 
test_x_orig shape: (50, 64, 64, 3) 
test_y shape: (1, 50)


In [4]:
# Reshape the training array such that each column represents an image
train_x_orig_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_orig_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

print(f"shape of train_x_orig_flatten = {train_x_orig_flatten.shape}, \nshape of test_x_orig_flatten = {test_x_orig_flatten.shape}")

shape of train_x_orig_flatten = (12288, 209), 
shape of test_x_orig_flatten = (12288, 50)


In [5]:
# Standardize the results, ensuring to fit the learned scalar from the training set to the test set.
train_X = train_x_orig_flatten/255.
test_X = test_x_orig_flatten/255.

print(f"Shape of train_X: {train_X.shape} \nShape of test_X: {test_X.shape}")

Shape of train_X: (12288, 209) 
Shape of test_X: (12288, 50)


In [6]:
# Let us build 2 NN's, a 2-layer NN and an L-layer NN, and compare there performance
def neural_network(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):

    np.random.seed(1) # Define np seed for reproducible results

    NN = MyNeuralNet.NeuralNet(layers_dims) # Create NN instance, which initializes parameters.

    costs = [] # Collection of costs for plotting

    # repeat forward prop, back prop, and gradient descent num_iterations times.
    for i in range(num_iterations):

        # Extract prediction from forward prop and caches from each layer.
        AL, caches = NN.forward_propagation(X)

        # Use the prediction to compute the cost
        cost = NN.compute_cost(AL, Y)

        # perform back prop, initializing it with AL and Y.
        grads = NN.backward_propagation(AL, Y, caches)

        # Update parameter using gradient descent.
        NN.update_parameters(grads, learning_rate)

        # Print cost per 500 iterations if print_cost is true
        if (i % 500 == 0 or i == num_iterations - 1) and print_cost == True:
            print(f"Cost after iteration {i}: {cost}")
            costs.append(cost)

    return costs, NN

layers_dims = [12288, 7, 1]
costs, two_layer_NN = neural_network(train_X, train_y, layers_dims, print_cost = True)

Cost after iteration 0: 0.7074426632736273
Cost after iteration 500: 0.3336897138438556
Cost after iteration 1000: 0.12412486168658173
Cost after iteration 1500: 0.05362667055856123
Cost after iteration 2000: 0.03330274414524972
Cost after iteration 2500: 0.02306424025583977
Cost after iteration 2999: 0.016151946179173104


In [7]:
# Predictions for train and test with the two layer NN.
predictions_train = two_layer_NN.predict(train_X)
predictions_test = two_layer_NN.predict(test_X)

# Select the first row of each array to make them 1D. Calculate accuracy for 2 layer NN
accuracy_train = accuracy_score(predictions_train[0], train_y[0])
accuracy_test = accuracy_score(predictions_test[0], test_y[0])
print(f"2 layer NN: \nAccuracy_train: {100 * accuracy_train:.2f}% \nAccuracy_test: {100 * accuracy_test:.2f}%")

2 layer NN: 
Accuracy_train: 100.00% 
Accuracy_test: 74.00%


In [8]:
# Set structure for the 4 layer NN
layers_dims = [12288, 20, 7, 5, 1]

# Train model.
costs, four_layer_NN = neural_network(train_X, train_y, layers_dims, print_cost = True)

Cost after iteration 0: 1.208125010600323
Cost after iteration 500: 0.647319447526937
Cost after iteration 1000: 0.6374058448197395
Cost after iteration 1500: 0.6243486119913269
Cost after iteration 2000: 0.5391408405511141
Cost after iteration 2500: 0.2998273840124648
Cost after iteration 2999: 0.08237974441376025


In [9]:
# Predictions for train and test with the four layer NN.
predictions_train = four_layer_NN.predict(train_X)
predictions_test = four_layer_NN.predict(test_X)

# Select the first row of each array to make them 1D. Calculate accuracy for 4 layer NN
accuracy_train = accuracy_score(predictions_train[0], train_y[0])
accuracy_test = accuracy_score(predictions_test[0], test_y[0])

print(f"{len(layers_dims) - 1} layer NN: \nAccuracy_train: {100 * accuracy_train:.2f}% \nAccuracy_test: {100 * accuracy_test:.2f}%")

# We can observe there is a large difference between our training and test set error. This means our model
# Currently has high variance. To try to combat this, we can try and accumulate more data, or try regularization.

4 layer NN: 
Accuracy_train: 100.00% 
Accuracy_test: 80.00%


In [10]:
def neural_network_reg(X, Y, layers_dims, lamda=0, keep_prob=1, learning_rate = 0.0075, num_iterations = 3000, print_cost=False, reg=False):

    np.random.seed(1) # Define np seed for reproducible results

    NN = MyNeuralNet.NeuralNet(layers_dims) # Create NN instance, which initializes parameters.

    costs = [] # Collection of costs for plotting

    # repeat forward prop, back prop, and gradient descent num_iterations times.
    for i in range(num_iterations):

        # Extract prediction from forward prop and caches from each layer.
        AL, caches = NN.forward_propagation_with_dropout(X, keep_prob)

        # Use the prediction to compute the cost
        cost = NN.compute_cost_reg(AL, Y, lamda)

        # perform back prop, initializing it with AL and Y.
        grads = NN.backward_propagation_with_dropout(AL, Y, caches, keep_prob, lamda, reg)

        # Update parameter using gradient descent.
        NN.update_parameters(grads, learning_rate)

        # Print cost per 500 iterations if print_cost is true
        if (i % 500 == 0 or i == num_iterations - 1) and print_cost == True:
            print(f"Cost after iteration {i}: {cost}")
            costs.append(cost)

    return costs, NN

In [19]:
# Set structure for the 4 layer NN
layers_dims = [12288, 20, 7, 5, 1]

# Train model.
costs, four_layer_NN_reg = neural_network_reg(train_X, train_y, layers_dims, lamda=0.4, keep_prob=0.9, print_cost = True, reg = True)

Cost after iteration 0: 1.3363956407504674
Cost after iteration 500: 0.7133243064748825
Cost after iteration 1000: 0.6114794757911202
Cost after iteration 1500: 0.3263759060098015
Cost after iteration 2000: 0.21516703390282282
Cost after iteration 2500: 0.1541758841485032
Cost after iteration 2999: 0.15980388508192295


In [20]:
# Predictions for train and test with the four layer NN.
predictions_train = four_layer_NN_reg.predict(train_X)
predictions_test = four_layer_NN_reg.predict(test_X)

# Select the first row of each array to make them 1D. Calculate accuracy for 4 layer NN
accuracy_train = accuracy_score(predictions_train[0], train_y[0])
accuracy_test = accuracy_score(predictions_test[0], test_y[0])

print(f"{len(layers_dims) - 1} layer NN: \nAccuracy_train: {100 * accuracy_train:.2f}% \nAccuracy_test: {100 * accuracy_test:.2f}%")

4 layer NN: 
Accuracy_train: 100.00% 
Accuracy_test: 84.00%


In [16]:
# We can see using dropout and l2 regularization we have managed a slight improvement in our test_accuracy.
# To further improve on this we could do a hyperparameter search over the learning rate, keep_prob, and lamda value
# to find a more optimal configuration of these parameters. We can also see at iteration 2999 the cost has increased
# slightly this could mean our learning rate is large at the end, to combat this we could implement learning rate decay
# or use the Adam optimizer which adapts the learning rate during training.

# In this notebook I learned how to implement He initialization and why it is important when using
# ReLu as our primary activation function. I also learned how to generalize my code to enable and
# account for deeper NNs. I have also learned how to implement L2 and dropout regularization
# and how they can help reduce overfitting.