In [None]:
import random
import numpy as np
import csv
import math

In [None]:
# Initialize lists to hold the labels and pixel data
labels = []
images = []

# Open and read the CSV file
with open('train.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    # Skip the header row
    next(reader)
    
    # Iterate through each row in the CSV file
    for row in reader:
        # The first column is the label
        labels.append(int(row[0]))
        # The rest of the columns are the pixels
        images.append([int(pixel) for pixel in row[1:]])

# Output the lengths of the lists to confirm they have been populated correctly
print(f'Number of labels: {len(labels)}')
print(f'Number of images: {len(images)}')

# Optionally print the first few elements to check
print('First 5 labels:', labels[:5])
print('First 5 images:', images[:5])

In [None]:
# Make a list of tuples of dataset: ([list of pixels in image], actual digit value)
combined_data = []
for label, image in zip(labels, images):
    pixels = [(pixel / 255.0) for pixel in image]
    combined_data.append((label, pixels))
print(combined_data[0])

#combined_data = [(label, image) for image, label in zip(images, labels)]
#print(combined_data[0])
#print(len(combined_data))

In [None]:
random.shuffle(combined_data)
train_data = combined_data[:32000]
test_data = combined_data[32000:]

In [None]:
class BasicNetwork(object):
    def __init__(self, layers):
        # Number of layers in size of the input list
        self.num_layers = len(layers)

        # Each element in the list corresponds to number of neurons in that layer
        self.layers = layers

        self.biases = []

        # Randomly generate the weights and biases
        # For biases, just make it all start out at 0.1
        for i in range(1, len(layers)):
            bias = []
            for j in range(layers[i]):
                bias.append(0.0)
            self.biases.append(np.array(bias).reshape(len(bias), 1))
        

        # Randomly generate a (size(layer + 1) x size(layer) matrix of weights using Xavier Initialization method
        # Note, self.weights is not a numpy array. Later on we will treat each individual array within it as a numpy array, but not actually
        # convert them into a numpy array
        self.weights = []
        for i in range(len(layers) - 1):
            weight = [[random.gauss(0.0, math.sqrt(2 / (layers[i] + layers[i+1]))) for _ in range(layers[i])] for _ in range(layers[i + 1])]
            self.weights.append(np.array(weight))


        self.activations = []
        self.sig_activations = []


        # Method to calculate the output layer given the input layer
    #
    # inputLayer: a list of values indicating the initial values of input layer
    def calculateOutputLayer(self, inputLayer):
        # Have variable currLayer keeping track of values of whatever layer we are on in loop
        currLayer = np.array(inputLayer).reshape(len(inputLayer), 1)
        activations = []
        sig_activations = []

        sig_activations.append(currLayer)
        # Loop over the length of bias list
        for b in range(len(self.biases)):
            # Create a temporary nextLayer list to store the calculated values of weighted sum and the sigmoid of weighted sum
            nextLayer = np.dot(self.weights[b], currLayer) + self.biases[b]

            sigNextLayer = sigmoid(nextLayer)

            currLayer = sigNextLayer
            
            # Append sigmoid layer and normal layer to respective lists
            sig_activations.append(sigNextLayer)
            activations.append(nextLayer)

        self.activations = activations
        self.sig_activations = sig_activations
        return currLayer
    
    # Method to start the training of the network
    #
    # train_data: The input data to train on
    # batch_size: The size of each batch within the overall input data
    def startTrain(self, train_data, batch_size, iterations):

        print("Starting Training", end='\n\n')
        for i in range(iterations):
            print(f"\n\n\nStarting Iteration {i}\n\n\n")
            random.shuffle(train_data)

            # Making a list to store all the batches
            batches = []
            # Loop over training data and create batches of (batch_size)
            for j in range(0, len(train_data), batch_size):
                batch = train_data[j:j + batch_size]
                batches.append(batch)

            # For each batch created, do backprop and update all parameters and calculate the average cost function of each batch
            counter = 1
            for mini_batch in batches:
                print(f"Batch Number {counter}")
                avgCost = self.update_parameters(mini_batch)
                print(f"Cost: {avgCost}")
                counter += 1
        
    # Method to update the weights and biases within a network
    #
    # batch: the mini batch of the training data to do backprop on. Each batch is a list of tuples, with each tuple containing
    #        the actual value of the image, and a list of pixels representing the image
    def update_parameters(self, batch):
        avgCost = 0

        # The total lists contains the average way to shift weights and biases
        total_delta_bias = []
        # Create a bias list (similar to self.biases) with all 0's
        for i in range(len(self.biases)):
            total_delta_bias.append(np.array([0 for j in range(len(self.biases[i]))]).reshape(len(self.biases[i]), 1))

        total_delta_weight = []
        # Create a weight list (similar to self.weights) with all 0's
        for i in range(len(self.weights)):
            weight = []
            for j in range(len(self.weights[i])):
                weight.append([0 for k in range(len(self.weights[i][j]))])
            total_delta_weight.append(np.array(weight))

        # For each tuple in the list do backprop (remember each tuple: (number, [list of pixels]) )
        for actualValue, pixels in batch:
            delta_bias, delta_weight = self.backprop(actualValue, pixels)

            # Keep track of average cost in batch
            avgCost += costFunction(self.sig_activations[-1], actualValue)

            # Add up the individual bias and weight gradients of each tuple in batch
            total_delta_bias = [(totBias + bias) for totBias, bias in zip(total_delta_bias, delta_bias)]
            total_delta_weight = [(totWeight + weight) for totWeight, weight in zip(total_delta_weight, delta_weight)]


        # Update the weights and biases with the average gradient change to each
        self.weights = [(currWeight + (weight / (10 * len(weight)))) for currWeight, weight in zip(self.weights, total_delta_weight)]
        self.biases = [(currBias + (bias / (10 * len(batch)))) for currBias, bias in zip(self.biases, total_delta_bias)]

        # Return average cost for statistical purposes
        return (avgCost / len(batch))
    
    # Method to do backpropagation when training
    #
    # label: The actual value represented in the image
    # pixels: The list of pixels representing the image
    def backprop(self, label, pixels):
        # Calculate the values of each layer for image
        self.calculateOutputLayer(pixels)

        # The delta lists contains the way to shift weights and biases to recognize that image
        delta_bias = []
        # Create a bias list (similar to self.biases) with all 0's
        for i in range(len(self.biases)):
            delta_bias.append(np.array([0 for j in range(len(self.biases[i]))]).reshape(len(self.biases[i]), 1))

        delta_weight = []
        # Create a weight list (similar to self.weights) with all 0's
        for i in range(len(self.weights)):
            weight = []
            for j in range(len(self.weights[i])):
                weight.append([0 for k in range(len(self.weights[i][j]))])
            delta_weight.append(np.array(weight))

        # Calculate the derivative of cost function and activation function (sigmoid function) when back propagating
        costPrime = costFunctionPrime(self.sig_activations[-1], label)
        sigPrime = sigmoidPrime(self.activations[-1])

        # Delta represents the multiplied costPrime and sigPrime lists. This is the constant term needed for all of the backprop
        # calculations 
        delta = (costPrime * sigPrime)
        delta_bias[-1] = delta
        delta_weight[-1] = np.dot(delta, (self.sig_activations[-2]).reshape(1, len(self.sig_activations[-2])))

        # Loop over each layer starting from output layer, and calculate the backprop needed for the weights and biases of each layer
        for layer in range(2, self.num_layers):
            delta = np.dot((self.weights[-layer + 1]).reshape(len(self.weights[-layer + 1][0]), len(self.weights[-layer + 1])), delta)
            delta *= sigmoidPrime(self.activations[-layer]).reshape(len(self.activations[-layer]), 1)

            # Calculate the change needed in the weights and biases of each layer
            delta_bias[-layer] = delta
            delta_weight[-layer] = np.dot(delta, (self.sig_activations[-layer - 1]).reshape(1, len(self.sig_activations[-layer - 1])))
            
        return (delta_bias, delta_weight)
    
    def accuracy(self, inputs):
        totalInputs = len(inputs)
        totalRight = 0

        for value, pixels in inputs:
            totalRight += self.isCorrect(value, pixels)

        percentAcc = float('{:.2f}'.format(totalRight / totalInputs))

        print(f"Total Accuracy: {percentAcc}")
        return percentAcc

    def isCorrect(self, value, pixels):
        self.calculateOutputLayer(pixels)

        max = np.argmax(self.sig_activations[-1])

        if max == value:
            return 1
        return 0


    # Method to display the cost function output
def costFunction(outputLayer, desiredOutput):
    sum = 0
    for i in range(len(outputLayer)):
        if (i == desiredOutput):
            sum += (1 - outputLayer[i])**2
        else:
            sum += (0 - outputLayer[i])**2
    return sum

def costFunctionPrime(outputLayer, desiredOutput):
    #result = [activation - desiredOutput for activation in outputLayer]
    result = []
    for i in range(len(outputLayer)):
        if (i == desiredOutput):
            result.append(1 - outputLayer[i])
        else:
            result.append(0 - outputLayer[i])
    return np.array(result)

# The sigmoid function
def sigmoid(activation):
    return 1 / (1 + np.exp(-activation))

# The derivative of the sigmoid function
def sigmoidPrime(activation):
    return (sigmoid(activation) * (1 - sigmoid(activation)))

def sigmoidPrimeList(activation_list):
    return [sigmoidPrime(activation) for activation in activation_list]



In [None]:
networkTest = BasicNetwork([784, 128, 10])

networkTest.startTrain(train_data, 100, 10)

networkTest.accuracy(test_data)