In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
import numpy as np

def load_images(file_path, label_path, image_height, image_width):
    with open(file_path, 'r') as file:
        lines = [line.rstrip('\n') for line in file]  # Only remove the newline character at the end

    # Read the label data
    with open(label_path, 'r') as file:
        labels = [int(label.strip()) for label in file]

    # Parse the images
    num_images = len(lines) // image_height
    # Allocate memory for the images in a flat format
    images = np.zeros((num_images, image_height * image_width), dtype=int)
    
    for i in range(num_images):
        base_index = i * image_height
        for j in range(image_height):
            line = lines[base_index + j]
            # Right-pad the line with spaces if it is shorter than expected
            line = line.ljust(image_width, ' ')
            flat_index = j * image_width
            # Store flat pixel values directly
            images[i, flat_index:flat_index + image_width] = [common_mapping(char) for char in line]
    
    return images, np.array(labels)

def common_mapping(char):
    return 1 if char in ('#', '+') else 0

def preprocess_data(features, labels):
    return {'features': features.astype(np.float32), 'labels': labels}


In [3]:

# * Load and Process Digit Data
digits_data_images, digits_data_labels = load_images('./data/digitdata/trainingimages', './data/digitdata/traininglabels', 28, 28)
digits_test_images, digits_test_labels = load_images('./data/digitdata/testimages', './data/digitdata/testlabels', 28, 28)

# * Load and Process Face Data
faces_data_images, faces_data_labels = load_images('./data/facedata/facedatatrain', './data/facedata/facedatatrainlabels', 70, 70)
faces_test_images, faces_test_labels = load_images('./data/facedata/facedatatest', './data/facedata/facedatatestlabels', 70, 70)


In [4]:
m,n = digits_data_images.shape
X_dev = digits_data_images[0:1000]
Y_dev = digits_data_labels[0:1000]

X_train = digits_data_images[1000:m]
Y_train = digits_data_labels[1000:m]

In [5]:
def init_params():
    W1 = np.random.randn(10,784)
    b1 = np.random.randn(10,1)
    W2 = np.random.randn(10,10)
    b2 = np.random.randn(10,1)
    return W1,b1,W2,b2

def ReLU(x):
    return np.maximum(0,x)

def softmax(x):
    e_x = np.exp(x - np.max(x, axis=0))
    return e_x / np.sum(e_x, axis=0)

def forward_prop(w1, b1, w2, b2, X):
    Z1 = np.dot(w1,X) + b1
    A1 = ReLU(Z1)
    Z2 = np.dot(w2,A1) + b2
    A2 = softmax(Z2)
    return Z1,A1,Z2,A2

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max()+1))
    one_hot_Y[np.arange(Y.size),Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def derivative_ReLU(x):
    return 1. * (x > 0)

def back_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = Y.size
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1/m * np.dot(dZ2, A1.T)
    db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)  # corrected sum over axis
    dZ1 = np.dot(W2.T, dZ2) * derivative_ReLU(Z1)
    dW1 = 1/m * np.dot(dZ1, X.T)
    db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)  # ensure correct axis and keepdims

    return dW1, db1, dW2, db2

    

    
def update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    return W1,b1,W2,b2

In [8]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions,Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X,Y,iterations, alpha):
    W1,b1,W2,b2 = init_params()
    X = X.T
    for i in range(iterations):
        Z1,A1,Z2,A2 = forward_prop(W1,b1,W2,b2,X)
        dW1,db1,dW2,db2 = back_prop(Z1,A1,Z2,A2,W1,W2,X,Y)
        W1,b1,W2,b2 = update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,alpha)
        if i % 50 == 0:
            print("Iteration: ",i)
            predictions = get_predictions(A2)
            print("Accuracy: ", get_accuracy(predictions,Y))
    return W1,b1,W2,b2
    
W1,b1,W2,b2 = gradient_descent(X_train,Y_train,1000,0.1)

Iteration:  0
[8 8 8 ... 7 6 8] [0 7 1 ... 2 1 2]
Accuracy:  0.099
Iteration:  50
[9 9 1 ... 9 1 9] [0 7 1 ... 2 1 2]
Accuracy:  0.27675
Iteration:  100
[9 9 1 ... 9 1 9] [0 7 1 ... 2 1 2]
Accuracy:  0.313
Iteration:  150
[9 3 1 ... 9 1 9] [0 7 1 ... 2 1 2]
Accuracy:  0.354
Iteration:  200
[4 3 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.38475
Iteration:  250
[4 9 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.44575
Iteration:  300
[4 9 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.47325
Iteration:  350
[4 9 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.484
Iteration:  400
[4 7 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.50275
Iteration:  450
[4 7 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.51575
Iteration:  500
[4 7 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.53075
Iteration:  550
[4 7 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.54425
Iteration:  600
[4 7 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.5545
Iteration:  650
[4 7 1 ... 0 1 6] [0 7 1 ... 2 1 2]
Accuracy:  0.563
Iteration:  700
[0 7