In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

Project from Thomas Nield's Essential Math for Data Science

In [16]:
all_data = pd.read_csv("https://tinyurl.com/y2qmhfsr")
all_data.head()

Unnamed: 0,RED,GREEN,BLUE,LIGHT_OR_DARK_FONT_IND
0,0,0,0,0
1,0,0,128,0
2,0,0,139,0
3,0,0,205,0
4,0,0,238,0


In [17]:
all_data.sum()

RED                       215077
GREEN                     194316
BLUE                      182750
LIGHT_OR_DARK_FONT_IND       833
dtype: int64

In [22]:
all_data.isnull().sum()

RED                       0
GREEN                     0
BLUE                      0
LIGHT_OR_DARK_FONT_IND    0
dtype: int64

In [23]:
#learning rate is set to .05, not too small or too big

L = 0.05

In [24]:
#split data between inputs and outputs

all_inputs = (all_data.iloc[:, 0:3].values / 255.00)
all_outputs = all_data.iloc[:, -1].values

In [25]:
#split data again for training and testing

X_train, X_test, y_train, y_test = train_test_split(all_inputs, all_outputs, test_size=1 / 3)
n = X_train.shape[0]

In [6]:
n

896

In [26]:
#define weights and biases randomly

w_hidden = np.random.rand(3, 3)
w_output = np.random.rand(1, 3)

b_hidden = np.random.rand(3, 1)
b_output = np.random.rand(1, 1)

In [27]:
#define activation functions

relu = lambda x: np.maximum(x, 0)
logistic = lambda x: 1 / (1 + np.exp(-x))

In [28]:
#devise forward pass, the initial training of the model

def forward_prop(X):
    Z1 = w_hidden @ X + b_hidden
    A1 = relu(Z1)
    Z2 = w_output @ A1 + b_output
    A2 = logistic(Z2)
    return Z1, A1, Z2, A2

In [43]:
Z1

array([[-3.4262595 ],
       [-3.20672437],
       [-3.91469438]])

In [44]:
A1

array([[0.],
       [0.],
       [0.]])

In [45]:
Z2

array([[-5.44904514]])

In [46]:
A2

array([[0.00428199]])

In [30]:
#derivatives of activation functions

d_relu = lambda x: x > 0
d_logistic = lambda x: np.exp(-x) / (1 + np.exp(-x)) ** 2

In [31]:
#define backward propogation using matrix multiplication

def backward_prop(Z1, A1, Z2, A2, X, y):
    dC_dA2 = 2 * A2 - 2 * y
    dA2_dZ2 = d_logistic(Z2)
    dZ2_dA1 = w_output
    dZ2_dW2 = A1
    dZ2_dB2 = 1
    dA1_dZ1 = d_relu(Z1)
    dZ1_dW1 = X
    dZ1_dB1 = 1
    
    dC_dW2 = dC_dA2 @ dA2_dZ2 @ dZ2_dW2.T
    dC_dB2 = dC_dA2 @ dA2_dZ2 * dZ2_dB2
    dC_dA1 = dC_dA2 @ dA2_dZ2 @ dZ2_dA1
    dC_dW1 = dC_dA1 @ dA1_dZ1 @ dZ1_dW1.T
    dC_dB1 = dC_dA1 @ dA1_dZ1 * dZ1_dB1
    return dC_dW1, dC_dB1, dC_dW2, dC_dB2
    

In [32]:
#update the weights and biases as model trains

for i in range(100_000):
    idx = np.random.choice(n, 1, replace=False)
    X_sample = X_train[idx].transpose()
    y_sample = y_train[idx]
    
    Z1, A1, Z2, A2 = forward_prop(X_sample)
    
    dW1, dB1, dW2, dB2 = backward_prop(Z1, A1, Z2, A2, X_sample, y_sample)
    
    w_hidden -= L * dW1
    b_hidden -= L * dB1
    w_output -= L * dW2
    b_output -= L * dB2

In [39]:
w_hidden

array([[4.14871212, 8.12738159, 1.60115839],
       [3.68851641, 8.48445087, 1.08026492],
       [3.58476487, 8.36571598, 1.21087937]])

In [40]:
b_hidden

array([[-6.56050264],
       [-5.80617772],
       [-6.52879066]])

In [41]:
w_output

array([[4.55357226, 2.16478062, 3.78413628]])

In [42]:
b_output

array([[-5.44904697]])

In [33]:
#predict results

test_predictions = forward_prop(X_test.transpose())[3]

test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), y_test)

accuracy = sum(test_comparisons.astype(int) / X_test.shape[0])

print("ACCURACY: ", accuracy)

ACCURACY:  0.9755011135857389
