In [11]:
## Package & data imports
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import sklearn
from sklearn import datasets

## Read data
#data_file = 'data/binary_classification.csv'
#raw_data = pd.read_csv(data_file)


In [128]:
## Building blocks of a DNN


# Function to construct a deep neural network
# Input the training data X and Y, return the trained parameters which represent the model trained
def deep_NN(X, Y, layer_dims, max_iter=500, learning_rate=0.01):
    
    # Insert the input (layer 0) into the layer_dims
    layer_dims.insert(0, X.shape[0])
    
    parameters = initialize(layer_dims)
    #print(parameters)
    for i in range(max_iter):
        
        cache, cost = forward_prop(X, Y, parameters, activation='tanh')
        gradients = back_prop(Y, cache, parameters, activation='tanh')
        
        # Update parameters
        l = int(len(parameters)/2)

        for j in range(1, l, 1):
            parameters['W'+str(j)] = parameters['W'+str(j)] - learning_rate * gradients['dW'+str(j)]
            parameters['b'+str(j)] = parameters['b'+str(j)] - learning_rate * gradients['db'+str(j)]
    
        # Produce an output every 100 iterations
        if i % 1 == 0:
            print('Current iteration is', i, 'and the current cost is', cost)
            
            
# Function to initialize the parameter for a DNN
# Input the number of layers for each layers and output a set of initialized parameters
def initialize(layer_dims):
    parameters = {}
    for i in range(1, len(layer_dims), 1):
        parameters['W'+str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1]) * 0.01
        parameters['b'+str(i)] = np.zeros((layer_dims[i], 1))
    
    return parameters


# Function to conduct forward propagation in a DNN
# May customize the activation function choosing between tanh and relu activation (except the last layer)
# Input is the X and parameters
# Output A and Z (as cache) for backward propagation
def forward_prop(X, Y, parameters, activation='tanh'):
    l = int(len(parameters)/2)
    m = X.shape[0]
    cache = {'A0': X}
    
    cache['Z1'] = np.matmul(parameters['W1'], X) + parameters['b1']
    if activation == 'tanh':
        cache['A1'] = tanh_activation(cache['Z1'])
    elif activation == 'relu':
        cache['A1'] = relu_activation(cache['Z1'])
    else:
        print('Incorrect activation value, process should abort.')
        
    for i in range(2, l, 1):
        cache['Z'+str(i)] = np.matmul(parameters['W'+str(i)], cache['A'+str(i-1)]) + parameters['b'+str(i)]
        if activation == 'tanh':
            cache['A'+str(i)] = tanh_activation(cache['Z'+str(i)])
        elif activation == 'relu':
            cache['A'+str(i)] = relu_activation(cache['Z'+str(i)])
    
    # Last layer
    cache['Z'+str(l)] = np.matmul(parameters['W'+str(l)], cache['A'+str(l-1)]) + parameters['b'+str(l)]
    cache['A'+str(l)] = sigmoid_activation(cache['Z'+str(l)])
    
    #print(cache['A'+str(l)])
    cost = - 1/m * np.sum(np.multiply(Y, np.log(cache['A'+str(l)])) + np.multiply((1 - Y), np.log(1 - cache['A'+str(l)])))
    
    return cache, cost


# Function to conduct back propagation in a DNN
# May customize the activation function choosing between tanh and relu activation (except the last layer)
# Inputs are Y, cache (Z and A value), parameter, and activation choices
# Output derivative
def back_prop(Y, cache, parameters, activation='tanh'):
    l = int(len(parameters)/2)
    m = X.shape[0]
    gradients = {}
    
    # Backpropagate last layer assuming sigmoid activation function in the last layer
    dZ = cache['A'+str(l)] - Y # Gradient of sigmoid activation
    dW = 1/m * np.matmul(dZ, cache['A'+str(l-1)].T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    gradients['dW'+str(l)] = dW
    gradients['db'+str(l)] = db
    
    dA = np.matmul(parameters['W'+str(l)].T, dZ)
    for i in range(l-1, 0, -1):
        # dZ at current layer
        if activation == 'tanh':
            dZ = np.multiply(dA, tanh_derivative(cache['Z'+str(i)]))
        elif activation == 'relu':
            dZ = np.multiply(dA, relu_derivative(cache['Z'+str(i)]))
        else:
            print('Incorrect activation value, process should abort.')
        
        dW = 1/m * np.matmul(dZ, cache['A'+str(i-1)].T)
        db = 1/m * np.sum(dZ, axis=1, keepdims=True)
        gradients['dW'+str(i)] = dW
        gradients['db'+str(i)] = db        
        
        dA = np.matmul(parameters['W'+str(i)].T, dZ)
    
    return gradients
        
    


    
# Implementing tanh function
def tanh_activation(Z):
    A = (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))
    return A


# Derivative of tanh function
def tanh_derivative(Z):
    derivative = 1 / np.power(np.cosh(Z), 2)
    return derivative


# Implementing relu function
def relu_activation(Z):
    A = np.maximum(Z, np.zeros((Z.shape[0], Z.shape[1])))
    return A


# Derivative of relu function
def relu_derivative(Z):
    derivative = np.maximum(Z, np.zeros((Z.shape[0], Z.shape[1])))
    derivative[derivative > 0] = 1
    return derivative


# Implementing sigmoid function
def sigmoid_activation(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

# Test my implementation
* Dataset used is a simple binary classification dataset

In [129]:
## Read data
data_file = 'data/binary_classification.csv'
raw_data = pd.read_csv(data_file)

# Extract X and scale
X = raw_data.iloc[:, 1:raw_data.shape[1]]
X_max = X.apply(np.max, axis=0)
X_min = X.apply(np.min, axis=0)
X = (X - X_min) / (X_max - X_min)
np_X = X.to_numpy().T
np_X = np_X.astype('float64')

# map Y to 0/1 binary values
Y = raw_data.iloc[:, 0]
Y = Y.map({'M': 0, 'B': 1})
np_Y = Y.to_numpy()
np_Y = np_Y.reshape(1, np_Y.shape[0])
np_Y = np_Y.astype('float64')

# Run deep neural network
parameters = deep_NN(np_X, np_Y, [5, 1], max_iter=20, learning_rate=0.01)


Current iteration is 0 and the current cost is 13.14706679078232
Current iteration is 1 and the current cost is 13.14706463189791
Current iteration is 2 and the current cost is 13.147062473015618
Current iteration is 3 and the current cost is 13.147060314135445
Current iteration is 4 and the current cost is 13.147058155257394
Current iteration is 5 and the current cost is 13.14705599638147
Current iteration is 6 and the current cost is 13.147053837507675
Current iteration is 7 and the current cost is 13.147051678636009
Current iteration is 8 and the current cost is 13.147049519766478
Current iteration is 9 and the current cost is 13.147047360899082
Current iteration is 10 and the current cost is 13.147045202033825
Current iteration is 11 and the current cost is 13.147043043170711
Current iteration is 12 and the current cost is 13.147040884309742
Current iteration is 13 and the current cost is 13.14703872545092
Current iteration is 14 and the current cost is 13.147036566594245
Current i

In [53]:
np_X.shape

(569, 30)