In [172]:
## Package imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn import datasets


## Function declaration of shallow neural network
# Hyperparameters include the learning rate and the dimension of the hidden layer
def shallow(X, Y, layers_dim, learning_rate=0.001, max_iter=2000):
    # Initialization
    parameters = initialize(layers_dim)
    
    # Iterate a number of max_iter times
    for i in range(max_iter):
        # Forward propagation
        cache = forward_prop(X, parameters)
        # Backward propagation
        gradients = backward_prop(X, Y, parameters, cache)
        # Update parameters
        parameters['W1'] = parameters['W1'] - learning_rate * gradients['dW1']
        parameters['b1'] = parameters['b1'] - learning_rate * gradients['db1']
        parameters['W2'] = parameters['W2'] - learning_rate * gradients['dW2']
        parameters['b2'] = parameters['b2'] - learning_rate * gradients['db2']

    return parameters
    

# Function to initialize the four parameters
def initialize(layers_dim):
    #print(layers_dim)
    W1 = np.random.randn(layers_dim[1], layers_dim[0]) * 0.01
    b1 = np.zeros((layers_dim[1], 1))
    W2 = np.random.randn(layers_dim[2], layers_dim[1]) * 0.01
    b2 = np.zeros((layers_dim[2], 1))
    parameters = {'W1': W1, 
                  'b1': b1, 
                  'W2': W2, 
                  'b2': b2}
    return parameters


# Function to conduct forward propagation
# Input: X and parameters
# Output: The output of different layers
def forward_prop(X, parameters):
    # Extract parameters
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    # Calculate output for layers
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = 1 / (1 + np.exp(-Z2))
    cache = {'Z1': Z1, 
             'A1': A1,
             'Z2': Z2, 
             'A2': A2}
    return cache
    
    
# Function to conduct backward propagation
# Input: X, Y, and cache
# Output: The gradients for the parameters
def backward_prop(X, Y, parameters, cache):
    # Extract info
    W1 = parameters['W1']
    W2 = parameters['W2']
    A1 = cache['A1']
    A2 = cache['A2']
    m = Y.shape[0]
    
    # Calculate gradients
    dZ2 = A2 - Y
    dW2 = 1/m * np.dot(dZ2, A1.T)
    db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), (1 - np.power(A1, 2)))
    dW1 = 1/m * np.dot(dZ1, X.T)
    db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)
    gradients = {'dW1': dW1, 
                 'db1': db1, 
                 'dW2': dW2, 
                 'db2': db2}
    return gradients
    

In [175]:
## Read data
data_file = 'data/binary_classification.csv'
raw_data = pd.read_csv(data_file)

# Extract X and scale
X = raw_data.iloc[:, 1:raw_data.shape[1]]
X_max = X.apply(np.max, axis=0)
X_min = X.apply(np.min, axis=0)
X = (X - X_min) / (X_max - X_min)
np_X = X.to_numpy().T
np_X = np_X.astype('float64')

# map Y to 0/1 binary values
Y = raw_data.iloc[:, 0]
Y = Y.map({'M': 0, 'B': 1})
np_Y = Y.to_numpy()
np_Y = np_Y.reshape(1, np_Y.shape[0])
np_Y = np_Y.astype('float64')

# Run shallow neural network
parameters = shallow(np_X, np_Y, [np_X.shape[0], 10, 1])

# Test using the current set
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']

Z1 = np.dot(W1, np_X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = 1 / (1 + np.exp(-Z2))
y_pred = A2.copy()
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
y_pred

count = 0
for i in range(y_pred.shape[1]):
    if y_pred[0][i] == np_Y[0][i]:
        count += 1
print(count/np_Y.shape[1])

0.9876977152899824
