In [109]:
import numpy as np
import random
import math
from tqdm import tqdm
import os
import pickle

In [110]:
# My activation function of choice. Sigmoid returns a value between 1 and 0.
def sigmoid(x) -> float:
    return 1 / (1 + np.exp(-x))

# Applies the sigmoid function to every value in the vector.
def vector_sigmoid(vector) -> np.ndarray:
    sig_vector = np.vectorize(sigmoid)(vector)
    return sig_vector

# Derivative of the sigmoid function.
def sigmoid_derivative(x) -> float:
    return sigmoid(x)*(1-sigmoid(x))

In [111]:
# Calculates the values of a single layer.
def calculate_layer(layer : int) -> None:
    global weights, biases, activations, weighted_inputs
    # z(L) = b(L) + W(L) * a(L-1)
    #print("biases[layer]: ", biases[layer])
    weighted_input = biases[layer] + (weights[layer] @ activations[layer-1])
    
    weighted_inputs[layer] = weighted_input
    activated = vector_sigmoid(weighted_input)
    activations[layer] = activated


# Loops through every layer and calculates their activation values.
def calculate_output() -> None:
    global layers, activations
    for i in range(1,len(layers)):
        calculate_layer(i)


In [112]:
# Calculates the derivative of the chain rule.
def calculate_chain_derivative(is_parent, layer, row, column) -> float:
    global weights, weighted_inputs, activations, expected_output
    sum = 0
    if layer == len(activations) - 1:
        cost_derivative = 2 * (activations[layer][row] - expected_output[row])
        
        sum = cost_derivative
    else:
        for i in range(len(activations[layer+1])):
            sum += calculate_chain_derivative(False, layer + 1, i, row)
    weighted_input = weighted_inputs[layer][row]
    activation_derivative = sigmoid_derivative(weighted_input)
    if is_parent:
        return sum * activation_derivative
    else:
        weighted_input_derivative = weights[layer][row][column]
        return sum * activation_derivative * weighted_input_derivative

In [113]:
# Calculates the derivative of the bias.
def calculate_bias_derivative(is_parent, layer, row, column) -> float: # Literally just a useless function.
    chain_derivative = calculate_chain_derivative(is_parent, layer, row, column)
    return chain_derivative

# Calculates the derivative of the weight.
def calculate_weight_derivative(is_parent, layer, row, column) -> float:  
    chain_derivative = calculate_chain_derivative(is_parent, layer, row, column)
    result = chain_derivative * activations[layer-1][column]
    return result

In [114]:
def backpropagate() -> None:
    
    global weights, biases, activations, weighted_inputs, expected_output, weight_gradient, bias_gradient

    for layer in range(1, len(weights)):
        for row in range(len(weights[layer])):
            for column in range(len(weights[layer][row])):
                weight_gradient[layer][row][column] = calculate_weight_derivative(True, layer, row, column)
    
    for layer in range(1, len(biases)):
        for row in range(len(biases[layer])):
            bias_gradient[layer][row] = calculate_bias_derivative(True, layer, row, column)

In [115]:
def set_input(input : np.ndarray) -> None:
    global activations
    activations[0] = input

def set_expected_output(output : np.ndarray) -> None:
    global expected_output
    expected_output = output

In [116]:
def next_sample():
    global index, data
    #print("Index: ", index)
    sample = data[index]
    set_input(sample[0])
    set_expected_output(sample[1])
    index += 1

In [117]:
def learn() -> None:
    next_sample()
    calculate_output()
    backpropagate()

In [118]:
def mini_batch_gradient_descent(epochs: int, batch_size: int):
    global weight_gradient, bias_gradient, index, weights, biases
    index = 0
    for epoch in range(epochs):
        batch_weights = [np.zeros_like(layer) for layer in weight_gradient]
        batch_weights = np.array(batch_weights, dtype=object)
        batch_biases = [np.zeros_like(layer) for layer in bias_gradient]
        batch_biases = np.array(batch_biases, dtype=object)

        missed = 0
        for i in tqdm(range(batch_size), desc="Batch", leave=False):
            if i + index >= len(data):
                missed += 1
                break
            learn()
            #print("batch_weights: ", batch_weights)
            #print("weight_gradient: ", weight_gradient)
            batch_weights += weight_gradient
            batch_biases += bias_gradient
        
        batch_weights = np.array([x / (batch_size - missed) for x in batch_weights],dtype=object)
        #weights -= batch_weights
        for w, b in zip(weights, batch_weights):
            w -= b
        
        
        batch_biases = np.array([x / (batch_size - missed) for x in batch_biases],dtype=object)
        #biases -= batch_biases
        for b, b2 in zip(biases, batch_biases):
            b -= b2


In [119]:
# Check if the formatted data file exists
if os.path.exists("formatted_data.pkl"):
    print("Formatted data file already exists. Loading data...")
    with tqdm(total=1, desc="Loading data") as pbar:
        with open("formatted_data.pkl", "rb") as file:
            data = pickle.load(file)
        pbar.update(1)
else:
    print("Formatted data file does not exist. Formatting data...")
    # Your existing code for formatting the data
    
    with open("mnist_train.csv", "r") as file:
        lines = file.readlines()
        data = []
        total_lines = len(lines)
        for line in tqdm(lines, total=total_lines, desc="Processing data"):
            values = line.strip().split(",")
            label = int(values[0])
            image = np.array([float(x)/255 for x in values[1:]], dtype=np.float32)
            if len(image) != 784: print("Error! Image wrong size: " + len(image))
            expected_output = np.zeros(10)
            expected_output[label] = 1
            example = [image, expected_output]
            data.append(example)

        # Save the training data into a file
        with open("formatted_data.pkl", "wb") as file:
            pickle.dump(data, file)
        print("Training data saved into formatted data file.")


#print(data[0])


Formatted data file already exists. Loading data...


Loading data: 100%|██████████| 1/1 [00:00<00:00,  2.13it/s]




In [120]:
# Turn off dumb warnings
#np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
layers = [784, 16, 16, 10]

weights = [0] + [np.random.uniform(-1,1,size=(layers[i], layers[i-1])) for i in range(1, len(layers))]
weigths = np.array(weights,dtype=object)

biases = [0] + [np.random.uniform(-1, 1, size=layers[i]) for i in range(1, len(layers))]
biases = np.array(biases, dtype=object)

weighted_inputs = [0] + [np.zeros((layers[i], 1)) for i in range(1, len(layers))]
weighted_inputs = np.array(weighted_inputs, dtype=object)

activations = [np.zeros((layers[i], 1)) for i in range(0, len(layers))]
weight_gradient = [np.zeros_like(layer) for layer in weights]
weight_gradient = np.array(weight_gradient, dtype=object)
bias_gradient = [np.zeros_like(layer) for layer in biases]
bias_gradient = np.array(bias_gradient, dtype=object)

mini_batch_gradient_descent(200, 1)

                                                    

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3,) + inhomogeneous part.