In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import math
from keras.utils import to_categorical
import tensorflow as tf
import pandas as pd
import math
import warnings
warnings.filterwarnings("default")

In [2]:
def dimension_calculator(orig_dims, layers, layer_dims):
    di = {}
    orig_dim, _, orig_depth = orig_dims
    di[str(0)] = orig_depth
    for i in range(len(layers)):
        if i == 0:
            hparameters = layer_dims[i]
            f, s, _, p = hparameters
            di[str(i + 1)] = math.floor(((orig_dim + 2 * p - f) / (s)) + 1)
        elif layers[i] == 'Pool':
            hparameters = layer_dims[i]
            f, s, _ = hparameters
            di[str(i + 1)] = math.floor(((di[str(i)] - f) / (s)) + 1)
        elif layers[i] == 'Conv':
            hparameters = layer_dims[i]
            f, s, _, p = hparameters
            di[str(i + 1)] = math.floor(((di[str(i)] + 2 * p - f) / (s)) + 1)
        elif layers[i] == 'Flatten':
            hparameters = layer_dims[i]
            f_no = hparameters
            di[str(i + 1)] = di[str(i)] * di[str(i)] * f_no
    return di

In [3]:
def initialize_parameters_deep(i, layer_dims, activations, initialization, Adam, n_prev = 0):
    if n_prev == 0:
        if initialization == True:
            if activations[i - 1] == 'linear' or activations[i - 1] == 'relu':
                W = np.random.randn(layer_dims[i], layer_dims[i - 1]) * np.sqrt(1 / layer_dims[i - 1])
            else:
                W = np.random.randn(layer_dims[i], layer_dims[i - 1]) * np.sqrt(2 / layer_dims[i - 1])
        else:
            W = np.random.randn(layer_dims[i], layer_dims[i - 1]) * 0.01
        if Adam == True:
            Vdw = np.zeros((layer_dims[i], layer_dims[i - 1]))
            Vdb = np.zeros((layer_dims[i], 1))
            Sdw = np.zeros((layer_dims[i], layer_dims[i - 1]))
            Sdb = np.zeros((layer_dims[i], 1))
            Vcdw = np.zeros((layer_dims[i], layer_dims[i - 1]))
            Vcdb = np.zeros((layer_dims[i], 1))
            Scdw = np.zeros((layer_dims[i], layer_dims[i - 1]))
            Scdb = np.zeros((layer_dims[i], 1))
        b = np.zeros((layer_dims[i], 1))
    else:
        if initialization == True:
            if activations[i - 1] == 'linear' or activations[i - 1] == 'relu':
                W = np.random.randn(layer_dims[i], n_prev) * np.sqrt(1 / n_prev)
            else:
                W = np.random.randn(layer_dims[i], n_prev) * np.sqrt(2 / n_prev)
        else:
            W = np.random.randn(layer_dims[i], n_prev) * 0.01
        if Adam == True:
            Vdw = np.zeros((layer_dims[i], n_prev))
            Vdb = np.zeros((layer_dims[i], 1))
            Sdw = np.zeros((layer_dims[i], n_prev))
            Sdb = np.zeros((layer_dims[i], 1))
            Vcdw = np.zeros((layer_dims[i], n_prev))
            Vcdb = np.zeros((layer_dims[i], 1))
            Scdw = np.zeros((layer_dims[i], n_prev))
            Scdb = np.zeros((layer_dims[i], 1))
        b = np.zeros((layer_dims[i], 1))
    return W, Vdw, Vdb, Sdw, Sdb, Vcdw, Vcdb, Scdw, Scdb, b

In [4]:
def initialize_parameters_conv(layers, hparameters, hparameters_prev, orig_depth = -1):
    if orig_depth == -1:
        f, s, f_no, p = hparameters
        _, _, f_no_prev, _ = hparameters_prev
        W = np.random.randn(f, f, f_no_prev, f_no)
        b = np.zeros((1, 1, 1, f_no))
        if Adam == True:
            Vdw = np.zeros((f, f, f_no_prev, f_no))
            Vdb = np.zeros((1, 1, 1, f_no))
            Sdw = np.zeros((f, f, f_no_prev, f_no))
            Sdb = np.zeros((1, 1, 1, f_no))
            Vcdw = np.zeros((f, f, f_no_prev, f_no))
            Vcdb = np.zeros((1, 1, 1, f_no))
            Scdw = np.zeros((f, f, f_no_prev, f_no))
            Scdb = np.zeros((1, 1, 1, f_no))
    else:
        f, s, f_no, p = hparameters
        W = np.random.randn(f, f, orig_depth, f_no)
        b = np.zeros((1, 1, 1, f_no))
        if Adam == True:
            Vdw = np.zeros((f, f, orig_depth, f_no))
            Vdb = np.zeros((1, 1, 1, f_no))
            Sdw = np.zeros((f, f, orig_depth, f_no))
            Sdb = np.zeros((1, 1, 1, f_no))
            Vcdw = np.zeros((f, f, orig_depth, f_no))
            Vcdb = np.zeros((1, 1, 1, f_no))
            Scdw = np.zeros((f, f, orig_depth, f_no))
            Scdb = np.zeros((1, 1, 1, f_no))
    return W, Vdw, Vdb, Sdw, Sdb, Vcdw, Vcdb, Scdw, Scdb, b

In [5]:
def initialize_parameters(orig_dims, layers, layer_dims, activations, initialization, Adam):
    parameters = {}
    di = dimension_calculator(orig_dims, layers, layer_dims)
    orig_dim, _, orig_depth = orig_dims
    for i in range(1, len(layers) + 1):
        if layers[i - 1] == 'Dense' and layers[i - 2] == 'Flatten':
            n_prev = di[str(i - 1)]
            parameters['W' + str(i)], parameters['Vdw' + str(i)], parameters['Vdb' + str(i)], parameters['Sdw' + str(i)], parameters['Sdb' + str(i)], parameters['Vcdw' + str(i)],parameters['Vcdb' + str(i)], parameters['Scdw' + str(i)], parameters['Scdb' + str(i)], parameters['b' + str(i)] = initialize_parameters_deep(i - 1, layer_dims, activations, initialization, Adam, n_prev)
        elif layers[i - 1] == 'Dense':
            parameters['W' + str(i)], parameters['Vdw' + str(i)], parameters['Vdb' + str(i)], parameters['Sdw' + str(i)], parameters['Sdb' + str(i)], parameters['Vcdw' + str(i)],parameters['Vcdb' + str(i)], parameters['Scdw' + str(i)], parameters['Scdb' + str(i)], parameters['b' + str(i)] = initialize_parameters_deep(i - 1, layer_dims, activations, initialization, Adam)
        elif layers[i - 1] == 'Conv':
            if i == 1:
                hparameters = layer_dims[i - 1]
                parameters['W' + str(i)], parameters['Vdw' + str(i)], parameters['Vdb' + str(i)], parameters['Sdw' + str(i)], parameters['Sdb' + str(i)], parameters['Vcdw' + str(i)],parameters['Vcdb' + str(i)], parameters['Scdw' + str(i)], parameters['Scdb' + str(i)], parameters['b' + str(i)] = initialize_parameters_conv(layers, hparameters, [], orig_depth)
            elif layers[i - 2] != 'Pool':
                hparameters = layer_dims[i - 1]
                hparameters_prev = layer_dims[i - 2]
                parameters['W' + str(i)], parameters['Vdw' + str(i)], parameters['Vdb' + str(i)], parameters['Sdw' + str(i)], parameters['Sdb' + str(i)], parameters['Vcdw' + str(i)],parameters['Vcdb' + str(i)], parameters['Scdw' + str(i)], parameters['Scdb' + str(i)], parameters['b' + str(i)] = initialize_parameters_conv(layers, hparameters, hparameters_prev)
            elif layers[i - 2] == 'Pool':
                hparameters = layer_dims[i - 1]
                hparameters_prev = layer_dims[i - 3]
                parameters['W' + str(i)], parameters['Vdw' + str(i)], parameters['Vdb' + str(i)], parameters['Sdw' + str(i)], parameters['Sdb' + str(i)], parameters['Vcdw' + str(i)],parameters['Vcdb' + str(i)], parameters['Scdw' + str(i)], parameters['Scdb' + str(i)], parameters['b' + str(i)] = initialize_parameters_conv(layers, hparameters, hparameters_prev)
    return parameters

In [6]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), mode = 'constant', constant_values = (0, 0))
    return X_pad

In [7]:
def linear_activation(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache

In [8]:
def softmax(Z):
    temp = np.exp(Z)
    A = temp / np.sum(temp, axis = 0)
    cache = Z
    return A, cache

In [9]:
def sigmoid(Z):
    A = (1 / (1 + np.exp(-Z)))
    cache = Z
    return A, cache

In [10]:
def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

In [11]:
def tanh(Z):
    A = np.tanh(Z)
    cache = Z
    return A, cache

In [12]:
def leakyrelu(Z):
    A = np.maximum(0.01 * Z, Z)
    cache = Z
    return A, cache

In [13]:
def single_slice_convolution(a_slice_prev, W, b):
    s = a_slice_prev * W
    Z = np.sum(s)
    Z += np.float64(b)
    return Z

In [14]:
def single_layer_forward(A_prev, W, b, activation, keep_prob):
    Z, linear_cache = linear_activation(A_prev, W, b)
    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    elif activation == 'relu':
        A, activation_cache = relu(Z)
    elif activation == 'tanh':
        A, activation_cache = tanh(Z)
    elif activation == 'leakyrelu':
        A, activation_cache = leakyrelu(Z)
    elif activation == 'softmax':
        A, activation_cache = softmax(Z)
    elif activation == 'linear':
        if keep_prob != 1:
            D = np.random.rand(Z.shape[0], Z.shape[1])
            D = (D < keep_prob).astype(int)
            Z = D * Z
            Z /= keep_prob
            A_prev, W, b = linear_cache
            linear_cache = (D, A_prev, W, b)
        cache = linear_cache
        return Z, cache
    if keep_prob != 1:
        D = np.random.rand(A.shape[0], A.shape[1])
        D = (D < keep_prob).astype(int)
        A = D * A
        A /= keep_prob
        A_prev, W, b = linear_cache
        linear_cache = (D, A_prev, W, b)
    cache = (linear_cache, activation_cache)
    return A, cache

In [57]:
def conv_forward(A_prev, W, b, hparameters, activation):
    m, n_H_prev, n_W_prev, n_C = A_prev.shape
    f, f, n_C_prev, n_C = W.shape
    _, stride, _, pad = hparameters
    n_H = math.floor(((n_H_prev + 2 * pad - f) / (stride)) + 1)
    n_W = math.floor(((n_W_prev + 2 * pad - f) / (stride)) + 1)
    Z = np.zeros((m, n_H, n_W, n_C))
    A = np.zeros((m, n_H, n_W, n_C))
    A_prev_pad = zero_pad(A_prev, pad)
    for i in range(m):
        a_prev_pad = A_prev_pad[i]
        for h in range(n_H):
            vert_start = h * stride
            vert_end = vert_start + f
            for w in range(n_W):
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_C):
                    a_prev_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end]
                    weight = W[:,:,:,c]
                    bias = b[:,:,:,c]
                    Z[i, h, w, c] = single_slice_convolution(a_prev_slice, weight, bias)
                    if activation == 'relu':
                        A[i, h, w, c], cach = relu(Z[i, h, w, c])
                    elif activation == 'sigmoid':
                        A[i, h, w, c], cach = sigmoid(Z[i, h, w, c])
                    elif activation == 'linear':
                        A[i, h, w, c] = Z[i, h, w, c]
                    elif activation == 'tanh':
                        A[i, h, w, c], cach = tanh(Z[i, h, w, c])
    cache = (A_prev, W, b, hparameters, cach)
    return A, cache

In [60]:
def pool_forward(A_prev, hparameters, keep_prob):
    m, n_H_prev, n_W_prev, n_C = A_prev.shape
    f, stride, mode = hparameters
    n_H = math.floor(((n_H_prev - f) / (stride)) + 1)
    n_W = math.floor(((n_W_prev - f) / (stride)) + 1)
    A = np.zeros((m, n_H, n_W, n_C))
    for i in range(m):
        for h in range(n_H):
            vert_start = h * stride
            vert_end = vert_start + f
            for w in range(n_W):
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_C):
                    a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    if mode == 'max':
                        A[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == 'average':
                        A[i, h, w, c] = np.mean(a_prev_slice)
    D = np.random.rand(A.shape[0], A.shape[1], A.shape[2], A.shape[3])
    D = (D < keep_prob).astype(int)
    A = D * A
    A /= keep_prob
    cache = (A_prev, hparameters, D)
    return A, cache

In [17]:
def flatten_forward(A_prev):
    cache = (A_prev.shape[0], A_prev.shape[1], A_prev.shape[2], A_prev.shape[3])
    A = A_prev.reshape(A_prev.shape[0], A_prev.shape[1] * A_prev.shape[2] * A_prev.shape[3])
    return A.T, cache

In [59]:
def n_layer_forward(X, layers, layer_dims, parameters, activations, keep_prob):
    A = X
    caches = []
    for i in range(1, len(layers) + 1):
        A_prev = A
        if i < len(layers):
            if layers[i - 1] == 'Dense':
                A, cache = single_layer_forward(A_prev, parameters['W' + str(i)], parameters['b' + str(i)], activations[i - 1], keep_prob[i - 1])
            elif layers[i - 1] == 'Conv':
                hparameters = layer_dims[i - 1]
                A, cache = conv_forward(A_prev, parameters['W' + str(i)], parameters['b' + str(i)], hparameters, activations[i - 1])
            elif layers[i - 1] == 'Pool':
                hparameters = layer_dims[i - 1]
                A, cache = pool_forward(A_prev, hparameters, keep_prob[i - 1])
            elif layers[i - 1] == 'Flatten':
                A, cache = flatten_forward(A_prev)
        else:
            A, cache = single_layer_forward(A_prev, parameters['W' + str(i)], parameters['b' + str(i)], activations[i - 1], 1)
        caches.append(cache)
    return A, caches

In [19]:
def compute_cost(AL, Y, activation):
    m = Y.shape[1]
    if activation == 'linear':
        cost = (1 / (2 * m)) * (np.sum((AL - Y) ** 2))
    elif activation == 'softmax':
        cost = (- 1 / m) * (np.sum(Y * np.log(AL + 1e-8)))
    else:
        cost = (- 1 / m) * (np.sum(np.multiply(Y, np.log(AL + 1e-8)) + np.multiply(1 - Y, np.log(1 - AL + 1e-8))))
    cost = np.squeeze(cost)
    return cost

In [20]:
def linear_backward(dZ, cache, activation, keep_prob):
    if keep_prob != 1:
        D, A_prev, W, b = cache
    else:
        A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = (1 / m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T, dZ)
    return dA_prev, dW, db

In [21]:
def linear_derivative(dA):
    dZ = dA
    return dZ

In [22]:
def sigmoid_derivative(dA, cache):
    Z = cache
    s = (1 / (1 + np.exp(-Z)))
    dZ = dA * np.multiply(s, 1 - s)
    return dZ

In [23]:
def relu_derivative(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ

In [24]:
def leakyrelu_derivative(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0.01
    return dZ

In [25]:
def tanh_derivative(dA, cache):
    Z = cache
    A = np.tanh(Z)
    dZ = 1 - np.power(A, 2)
    return dZ

In [26]:
def single_layer_backward(dA, caches, activation, keep_prob):
    if activation == 'linear':
        linear_cache = caches
        if keep_prob != 1:
            D, c, v, b = linear_cache
            dA = dA * D
            dA /= keep_prob
            linear_cache = (D, c, v, b)
        dZ = linear_derivative(dA)
    else:
        linear_cache, activation_cache = caches
        if keep_prob != 1:
            D, c, v, b = linear_cache
            dA = dA * D
            dA /= keep_prob
            linear_cache = (D, c, v, b)
        if activation == 'sigmoid':
            dZ = sigmoid_derivative(dA, activation_cache)
        elif activation == 'softmax':
            dZ = dA
        elif activation == 'tanh':
            dZ = tanh_derivative(dA, activation_cache)
        elif activation == 'relu':
            dZ = relu_derivative(dA, activation_cache)
        elif activation == 'leakyrelu':
            dZ = leakyrelu_derivative(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache, activation, keep_prob)
    return dA_prev, dW, db

In [27]:
def conv_backward(dA, cache, activation):
    A_prev, W, b, hparameters, cach = cache
    if activation == 'relu':
        dZ = relu_derivative(dA, cach)
    elif activation == 'sigmoid':
        dZ = sigmoid_derivative(dA, cach)
    elif activation == 'tanh':
        dZ = tanh_derivative(dA, cach)
    elif activation == 'linear':
        dZ = linear_derivative(dA)
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    f, f, n_C_prev, n_C = W.shape
    _, stride, _, pad = hparameters
    m, n_H, n_W, n_C = dZ.shape
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))
    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    for i in range(m):
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end]
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
        dA_prev[i,:,:,:] = da_prev_pad[pad:-pad, pad:-pad,:]
    return dA_prev, dW, db

In [28]:
def create_mask_from_window(X):
    mask = (X == np.max(X))
    return mask

In [29]:
def distribute_value(dZ, shape):
    n_H, n_W = shape
    average = dZ / (n_H * n_W)
    a = np.zeros(shape) + average
    return a

In [71]:
def pool_backward(dA, cache, keep_prob):
    A_prev, hparameters, D = cache
    dA = dA * D
    dA = dA / keep_prob
    f, stride, mode = hparameters
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
    for i in range(m):
        a_prev = A_prev[i]
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    if mode == 'max':
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        mask = create_mask_from_window(a_prev_slice)
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += mask * dA[i, h, w, c]
                    elif mode == 'average':
                        da = dA[i, h, w, c]
                        shape = (f, f)
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
    return dA_prev

In [31]:
def flatten_backward(dA, cache):
    m, n_H, n_W, n_C = cache
    dA = dA.reshape(m, n_H, n_W, n_C)
    return dA

In [69]:
def n_layer_backward(X, Y, A, layers, layer_dims, caches, activations, keep_prob):
    grads = {}
    count = 1;
    if activations[len(activations) - 1] == 'linear' or activations[len(activations) - 1] == 'softmax':
        dA = A - Y
    else: 
        dA = - (np.divide(Y, A + 1e-8) - np.divide(1 - Y, 1 - A + 1e-8))
    grads['dA' + str(len(layer_dims) - 1)] = dA
    for i in range(len(layers) - 1, -1, -1):
        if i < len(layers) - 1:
            if layers[i] == 'Dense':
                grads['dA' + str(i - 1)], grads['dW' + str(i)], grads['db' + str(i)] = single_layer_backward(grads['dA' + str(i)], caches[len(caches) - count], activations[len(activations) - count], keep_prob[i - 1])
            elif layers[i] == 'Conv':
                grads['dA' + str(i - 1)], grads['dW' + str(i)], grads['db' + str(i)] = conv_backward(grads['dA' + str(i)], caches[len(caches) - count], activations[len(activations) - count])
            elif layers[i] == 'Pool':
                grads['dA' + str(i - 1)] = pool_backward(grads['dA' + str(i)], caches[len(caches) - count], keep_prob[i - 1])
            elif layers[i] == 'Flatten':
                grads['dA' + str(i - 1)] = flatten_backward(grads['dA' + str(i)], caches[len(caches) - count])
        else:
            grads['dA' + str(i - 1)], grads['dW' + str(i)], grads['db' + str(i)] = single_layer_backward(grads['dA' + str(i)], caches[len(caches) - count], activations[len(activations) - count], 1)
        count += 1
    return grads

In [33]:
def gradient_descent(parameters, layers, layer_dims, grads, learning_rate, t, beta_1, beta_2, epsilon, Adam, activations):
    for i in range(0, len(layers)):
        if layers[i] != 'Pool' and layers[i] != 'Flatten':
            if Adam == True:
                parameters['Vdw' + str(i + 1)] = (beta_1 * parameters['Vdw' + str(i + 1)]) + ((1 - beta_1) * grads['dW' + str(i)])
                parameters['Vdb' + str(i + 1)] = (beta_1 * parameters['Vdb' + str(i + 1)]) + ((1 - beta_1) * grads['db' + str(i)])
                parameters['Sdw' + str(i + 1)] = (beta_2 * parameters['Sdw' + str(i + 1)]) + ((1 - beta_2) * np.square(grads['dW' + str(i)]))
                parameters['Sdb' + str(i + 1)] = (beta_2 * parameters['Sdb' + str(i + 1)]) + ((1 - beta_2) * np.square(grads['db' + str(i)]))
                parameters['Vcdw' + str(i + 1)] = (parameters['Vdw' + str(i + 1)]) / (1 - np.power(beta_1, t))
                parameters['Vcdb' + str(i + 1)] = (parameters['Vdb' + str(i + 1)]) / (1 - np.power(beta_1, t))
                parameters['Scdw' + str(i + 1)] = (parameters['Sdw' + str(i + 1)]) / (1 - np.power(beta_2, t))
                parameters['Scdb' + str(i + 1)] = (parameters['Sdb' + str(i + 1)]) / (1 - np.power(beta_2, t))
                parameters['W' + str(i + 1)] = parameters['W' + str(i + 1)] - learning_rate * (np.divide(parameters['Vcdw' + str(i + 1)], np.sqrt(parameters['Scdw' + str(i + 1)]) + epsilon)) 
                parameters['b' + str(i + 1)] = parameters['b' + str(i + 1)] - learning_rate * (np.divide(parameters['Vcdb' + str(i + 1)], np.sqrt(parameters['Scdb' + str(i + 1)]) + epsilon))
            else:
                parameters['W' + str(i + 1)] = parameters['W' + str(i + 1)] - learning_rate * grads['dW' + str(i)] 
                parameters['b' + str(i + 1)] = parameters['b' + str(i + 1)] - learning_rate * grads['db' + str(i)]
    return parameters

In [34]:
def plot(costs, learning_rate):
    plt.plot(np.squeeze(costs))
    plt.ylabel('Cost')
    plt.xlabel('Iterations (Per Five)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()

In [35]:
def predict(A):
    for i in range(A.shape[1]):
        if A[0][i] < 0.5:
            A[0][i] = 0
        else:
            A[0][i] = 1
    return A

In [36]:
def F1SA(A, Y):
    prec = 0
    recall = 0
    TP = 0
    FP = 0
    FN = 0
    TN = 0
    for i in range(Y.shape[1]):
        if A[0][i] == Y[0][i] and A[0][i] == 1:
            TP += 1
        elif A[0][i] != Y[0][i] and A[0][i] == 1:
            FP += 1
        elif A[0][i] != Y[0][i] and A[0][i] == 0:
            FN += 1
        elif A[0][i] == Y[0][i] and A[0][i] == 0:
            TN += 1
    prec = TP / (TP + FP)
    recall = TP / (TP + FN)
    F1Score = (2 * prec * recall) / (prec + recall)
    accuracy = (TP + TN) / (TP + FN + TN + FP)
    return F1Score, accuracy

In [37]:
def test_set_prediction(X, Y, layers, layer_dims, parameters, activations):
    keep_prob = []
    for i in range(len(activations)):
        keep_prob.append(1)
    A, _ = n_layer_forward(X, layers, layer_dims, parameters, activations, keep_prob)
    if activations[len(activations) - 1] == 'linear':
        return A
    elif activations[len(activations) - 1] != 'linear' and layer_dims[len(layer_dims) - 1] > 1:
        A = multi_class(A)
        Y = multi_class(Y)
        Accuracy = predict_multiclass(A, Y)
        print('The accuracy of the model on Test Set is', Accuracy, '%')
    else:
        A = predict(A)
        F1Score, Accuracy = F1SA(A, Y)
        print('F1Score and Accuracy of the model on the Test Set is respectively', F1Score * 100, '% and', Accuracy * 100, '%')

In [38]:
def multi_class(AL):
    A = np.zeros((1, AL.shape[1]))
    for i in range(AL.shape[1]):
        A[0][i] = np.argmax(AL[:,i])
    return A

In [39]:
def predict_multiclass(A, Y):
    count = 0
    for i in range(Y.shape[1]):
        if A[0][i] == Y[0][i]:
            count += 1
    return (count / Y.shape[1]) * 100

In [40]:
def neural_network(X, Y, epochs, time_interval, orig_dims, layers, layer_dims, activations, learning_rate_i, decay_rate, keep_prob, initialization, Adam, trained, params = {}, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-8):
    if trained == True:
        parameters = params
    else:
        parameters = initialize_parameters(orig_dims, layers, layer_dims, activations, initialization, Adam)
    costs = []
    for i in range(epochs):
        learning_rate = (1 / (1 + (decay_rate * math.floor((i + 1) / (time_interval))))) * learning_rate_i
        A, caches = n_layer_forward(X, layers, layer_dims, parameters, activations, keep_prob)
        grads = n_layer_backward(X, Y, A, layers, layer_dims, caches, activations, keep_prob)
        parameters = gradient_descent(parameters, layers, layer_dims, grads, learning_rate, i + 1, beta_1, beta_2, epsilon, Adam, activations)
        if i % 5 == 0 or i == epochs - 1:
            cost = compute_cost(A, Y, activations[len(activations) - 1])
            print('Cost after' , i, 'epochs', 'is', cost)
            costs.append(cost)
    plot(costs, learning_rate)
    A, caches = n_layer_forward(X, layers, layer_dims, parameters, activations, keep_prob)
    if activations[len(activations) - 1] != 'linear' and layer_dims[len(layer_dims) - 1] > 1:
        A = multi_class(A)
        Y = multi_class(Y)
        print(A[0], Y[0])
        Accuracy = predict_multiclass(A, Y)
        print('The accuracy of the model on Training Set is', Accuracy, '%')
    elif activations[len(activations) - 1] != 'linear':
        A = predict(A)
        F1Score, Accuracy = F1SA(A, Y)
        print('F1Score and Accuracy of the model on the Training Set is respectively', F1Score * 100, '% and', Accuracy * 100, '%')
    return parameters