In [22]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import style
style.use('Solarize_Light2')

# Prepration of Data

## Loading the data

In [19]:
train_dataset = h5py.File('datasets/train_cat.h5', "r")
test_dataset = h5py.File('datasets/test_cat.h5', "r")

train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

# Reshaping the target labels to a row Vector instead of column Vector
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) 
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

classes = np.array(['not-cat', 'cat']) # the list of classes

m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig[0].shape[0]

#flattening
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1)
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1)

#correct dim
train_set_x_flatten = train_set_x_flatten.T
test_set_x_flatten = test_set_x_flatten.T

# normalizing
# values were from 0 to 255
train_set_x = train_set_x_flatten/255.
test_set_x = test_set_x_flatten/255.

In [21]:
print ("train_set_x shape ->", train_set_x.shape)
print ("train_set_y_orig shape ->", train_set_y_orig.shape)
print ("test_set_x shape ->", test_set_x.shape)
print ("test_set_y_orig shape ->", test_set_y_orig.shape)

train_set_x shape -> (12288, 209)
train_set_y_orig shape -> (1, 209)
test_set_x shape -> (12288, 50)
test_set_y_orig shape -> (1, 50)


# One Hidden Layer

<img src="OneHiddenLayer.png" style="width:650px;height:400px;">

## Defining the neural network structure 
    - n_0: the size of the input layer (n_x)
    - n_1: the size of the hidden layer
    - n_2: the size of the output layer (n_y)

### Getting the dimensions of Layers

In [43]:
def sigmoid(z):
    return 1./(1. + np.exp(-z))

In [26]:
def layer_sizes(X, Y, hidden_layer=4):
    n_0 = X.shape[0]
    n_1 = hidden_layer
    n_2 = Y.shape[0]
    return (n_0, n_1, n_2)

In [27]:
def initialize_parameters(n_0, n_1, n_2):
    
    W1 = np.random.randn(n_1, n_0) * 0.01
    b1 = np.zeros((n_1, 1))
    W2 = np.random.randn(n_2, n_1) * 0.01
    b2 = np.zeros((n_2, 1))
    
    assert (W1.shape == (n_1, n_0))
    assert (b1.shape == (n_1, 1))
    assert (W2.shape == (n_2, n_1))
    assert (b2.shape == (n_2, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [28]:
def forward_propagation(X, parameters):

    W1 = parameters["W1"]   #shape(n_1, n_0)
    b1 = parameters["b1"]   #shape(n_1, 1)
    W2 = parameters["W2"]   #shape(n_2, n_1)
    b2 = parameters["b2"]   #shape(n_2, 1)
    
    Z1 = np.dot(W1, X) + b1    
    A1 = np.tanh(Z1)         
    Z2 = np.dot(W2, A1) + b2 
    A2 = sigmoid(Z2)           

    assert(A2.shape == (1, X.shape[1])) #Number of data should match
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

In [29]:
def compute_cost(A2, Y, parameters):
    
    m = Y.shape[1] # number of data

    logprobs = np.multiply(Y, np.log(A2)) + np.multiply(1-Y, np.log(1 - A2))
    cost = -np.sum(logprobs)/m
    
    cost = np.squeeze(cost)     
                                
    return cost

In [45]:
def backward_propagation(parameters, cache, X, Y):

    m = X.shape[1]

    W1 = parameters["W1"]   
    W2 = parameters["W2"]    
    
    A1 = cache["A1"]         
    A2 = cache["A2"]        

   
    dZ2 = A2 - Y             #shape(1, m)
    dW2 = np.dot(dZ2, A1.T)/m  #shape(n_2, n_1)
    db2 = np.sum(dZ2, axis=1, keepdims=True)/m #shape(n_2, 1)
    dZ1 = np.dot(W2.T, dZ2) * ( 1-np.power(np.tanh(cache["Z1"]), 2) ) #shape(n_1, m)
    dW1 = np.dot(dZ1, X.T)/m #shape(n_1, n_0)
    db1 = np.sum(dZ1, axis=1, keepdims=True)/m #shape(n_1, 1)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

In [51]:
def update_parameters(parameters, grads, learning_rate):

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [77]:
def nn_model(X, Y, n_1, num_iterations=10000, learning_rate=0.1, print_cost=False, print_test=False, X_test=None, y_test=None):
    
    np.random.seed(3)
    n_0 = layer_sizes(X, Y)[0]
    n_2 = layer_sizes(X, Y)[2]
    
    parameters = initialize_parameters(n_0, n_1, n_2)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    for i in range(1, num_iterations+1):
         
        A2, cache = forward_propagation(X, parameters)
        
        cost = compute_cost(A2, Y, parameters)
 
        grads = backward_propagation(parameters, cache, X, Y)
 
        parameters = update_parameters(parameters, grads, learning_rate)
        
        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print (f"Cost after iteration {i}: {cost}", end="  |  ")
            if print_test:
                test_predictions = predict(parameters, X_test)                
                test_acc = 100 - np.mean(np.abs(test_predictions - y_test)) * 100
                print(f"test accuracy: {test_acc}")                


    return parameters

In [78]:
def predict(parameters, X):

    A2, cache = forward_propagation(X, parameters)
    predictions = np.array([ [0 if num<=0.5 else 1 for num in A2[0, :]] ]) #Turning probabilities to classes

    return predictions

In [79]:
parameters = nn_model(train_set_x,
                      train_set_y_orig,
                      n_1=8,
                      num_iterations=10000,
                      learning_rate=0.01,
                      print_cost=True,
                      print_test=True,
                      X_test=test_set_x,
                      y_test=test_set_y_orig)

Cost after iteration 500: 0.4885436388436988  |  test accuracy: 56.0
Cost after iteration 1000: 0.27278126505608613  |  test accuracy: 60.0
Cost after iteration 1500: 0.09879382681604416  |  test accuracy: 70.0
Cost after iteration 2000: 0.054082137251847365  |  test accuracy: 70.0
Cost after iteration 2500: 0.03551886899902286  |  test accuracy: 68.0
Cost after iteration 3000: 0.025402582197411767  |  test accuracy: 66.0
Cost after iteration 3500: 0.019334763424478  |  test accuracy: 66.0
Cost after iteration 4000: 0.015211479043025921  |  test accuracy: 68.0
Cost after iteration 4500: 0.012575217108374597  |  test accuracy: 68.0
Cost after iteration 5000: 0.01073960318548639  |  test accuracy: 68.0
Cost after iteration 5500: 0.009363938427583126  |  test accuracy: 68.0
Cost after iteration 6000: 0.00829341398195627  |  test accuracy: 68.0
Cost after iteration 6500: 0.007437269064112263  |  test accuracy: 68.0
Cost after iteration 7000: 0.0067376116733104995  |  test accuracy: 68.0
Co

In [72]:
train_predictions = predict(parameters, train_set_x)
test_predictions = predict(parameters, test_set_x)

In [73]:
train_acc = 100 - np.mean(np.abs(train_predictions - train_set_y_orig)) * 100
test_acc = 100 - np.mean(np.abs(test_predictions - test_set_y_orig)) * 100
print(f"train accuracy: {train_acc}")
print(f"test accuracy: {test_acc}")

train accuracy: 100.0
test accuracy: 68.0
