In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split

In [2]:
# Import data representing RGB values of pixels in an image and Light or Dark font classification
all_data  = pd.read_csv('https://tinyurl.com/y2qmhfsr')

In [21]:
# Learning rate
L = 0.01

In [3]:
# Extract the input colu,ms, scale down by 255
all_inputs = ((all_data.iloc[:, 0:3].values)/255.0)
all_outputs = all_data.iloc[:, -1].values

In [4]:
# Split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(all_inputs, all_outputs, test_size=0.25, random_state=1)

n = X_train.shape[0] # number of training records

In [5]:
# Neural network with weights and biases
# with random initialization
w_hidden = np.random.rand(3,3)
w_output = np.random.rand(1,3)

b_hidden = np.random.rand(3,1)
b_output = np.random.rand(1,1)

In [6]:
# Activation functions
relu = lambda x: np.maximum(x,0)
logistic = lambda x: 1/(1+np.exp(-x))

In [7]:
# Run inputs through the network to get predicted outputs
def forward_prop(x):
    Z1 = w_hidden @ x + b_hidden
    A1 = relu(Z1)
    Z2 = w_output @ A1 + b_output
    A2 = logistic(Z2)
    return Z1, A1, Z2, A2

In [14]:
# Calculate accuracy
test_predictions = forward_prop(X_test.T)[-1] # Grab only the output layer, A2
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test)
accuracy = sum(test_comparisons.astype(int)) / len(X_test)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.59


In [15]:
# Derivatives of Activation functions
d_relu = lambda x: np.where(x > 0, 1, 0)
d_logistic = lambda x: np.exp(-x)/((1+np.exp(-x))**2)

In [22]:
# Return slopes for weights and biases using chain rule
def backward_prop(Z1, A1, Z2, A2, X, Y):
    dC_dA2 = 2 * A2 - 2 * Y
    dA2_dZ2 = d_logistic(Z2)
    dZ2_dA1 = w_output
    dZ2_dW2 = A1
    dZ2_dB2 = 1
    dA1_dZ1 = d_relu(Z1)
    dZ1_dW1 = X
    dZ1_dB1 = 1
    
    dC_dW2 = dC_dA2 @ dA2_dZ2 @ dZ2_dW2.T
    
    dC_dB2 = dC_dA2 @ dA2_dZ2 * dZ2_dB2
    
    dC_dA1 = dC_dA2 @ dA2_dZ2 @ dZ2_dA1
    
    dC_dW1 = dC_dA1 @ dA1_dZ1 @ dZ1_dW1.T
    
    dC_dB1 = dC_dA1 @ dA1_dZ1 * dZ1_dB1
    
    return dC_dW1, dC_dB1, dC_dW2, dC_dB2

In [23]:
# Execute gradient descent
for i in range(100_000):
    # Randomly select on the training records
    idx = np.random.choice(n,1,replace=False)
    X_sample = X_train[idx].T
    Y_sample = Y_train[idx]
    
    # run randomly selected training record through the network
    Z1, A1, Z2, A2 = forward_prop(X_sample)
    
    # distribute error through backpropogation
    dW1, dB1, dW2, dB2 = backward_prop(Z1, A1, Z2, A2, X_sample, Y_sample)
    
    # Update weights and biases
    w_hidden -= L * dW1
    b_hidden -= L * dB1
    w_output -= L * dW2
    b_output -= L * dB2
    

In [24]:
# Re-calculate accuracy
test_predictions = forward_prop(X_test.T)[-1] # Grab only the output layer, A2
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test)
accuracy = sum(test_comparisons.astype(int)) / len(X_test)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.99
