# Workshop 2 Answer Key

In [1]:
import numpy as np

def get_data(path):
    f = open(path, 'r')
    
    lines = f.readlines()
    
    training_images = np.zeros((len(lines), 784))
    training_labels = np.zeros((len(lines), 10))
    index = 0
    for line in lines:
        line = line.strip()
        label = int(line[0])
        training_images[index, :] = np.fromstring(line[2:], dtype=int, sep=',')
        training_labels[index, label - 1] = 1.0
        index += 1
        

    f.close()
    
    return training_images / 255, training_labels

training_images, training_labels = get_data("../data/mnist_test.csv")

<img src="pics/FCN_close.png" width="600">

In [2]:
class NeuralNetwork():
    """
    A Fully Connected Neural Network. There are 784 input layer nodes, 12 hidden layer nodes, and 10 output layer
    nodes.
    """
    def __init__(self):
        
        # Helpful constants
        self.input_layer_size = 784
        self.hidden_layer_size = 12
        self.output_layer_size = 10
        
        # We need to create arrays to store all of the parts of our network:
        
        # Arrays to hold node values
        self.N = np.zeros((self.input_layer_size, ))
        self.H = np.zeros((self.hidden_layer_size, ))
        self.Z = np.zeros((self.output_layer_size, ))
        
        # Set random seed so values are predictable
        rng = np.random.default_rng(888)

        # Arrays to hold weight values (randomly initialized between -1 and 1)
        self.W = 2 * rng.random((self.input_layer_size, self.hidden_layer_size)) - 1
        self.V = 2 * rng.random((self.hidden_layer_size, self.output_layer_size)) - 1
        
        # Arrays to hold biases for hidden and output nodes
        self.B = 2 * rng.random(12) - 1
        self.C = 2 * rng.random(10) - 1
        

    def fill_input_nodes(self, x):
        """
        Given an image vector, fill self.N, the input node array. Remember, we just put values from the image
        into the input nodes, so this should be very easy.
        
        Parameters:
        x: input vector representing image data, one-dimensional vector
        """
        
        self.N = x
        
    
    def calculate_hi(self, i): 
        """
        Assuming the input nodes array is full, use these values to calculate hi, the ith hidden layer node.
        Once the value is calculated, fill the corresponding entry in self.H.
        You will need to access weight array W and bias array B.
        
        Parameters:
        i: the index telling which hidden layer node to calculate
        """
        
        h = self.N @ self.W[:, i] + self.B[i]
        h = np.tanh(h)
        self.H[i] = h
        
        # Some of you used a for loop to multiply values from the input layer by the appropriate weights. 
        # While that is an acceptable solution, it is faster to use a numpy dot product (numpy operations
        # are faster than python loops).

    
    def fill_hidden_nodes(self):
        """
        Use the calculate_hi method to iteratively fill every hidden layer node. This should be easy if your
        calculate_hi method works.
        
        For thought:
        Finding each hi value individually is a perfectly acceptable way of calculating node values,
        but is it the most efficient? Is there a way to calculate all hi values at once?
        """
        
        for i in range(self.hidden_layer_size):
            self.calculate_hi(i)
        
        
    def calculate_zi(self, i):
        """
        Assuming the hidden nodes array is full, use these values to calculate zi, the ith output layer node.
        Once the value is calculated, fill the corresponding entry in self.Z.
        You will need to access weight array V and bias array C.
        
        Parameters:
        i: the index telling which output layer node to calculate
        """
        
        z = self.H @ self.V[:, i] + self.C[i]
        z = 1 / (1 + np.exp(-1 * z))  # Applying the sigmoid activation function
        self.Z[i] = z

        
    def fill_output_nodes(self):
        """
        Use the calculate_zi method to iteratively fill every output layer node. This should be easy if your
        calculate_zi method works.
        """
        
        for i in range(10):
            self.calculate_zi(i)

    
    def forward(self, x):
        """
        Given an image vector, fill every node in the network. You have already written the necesary methods to
        complete this task, you just need to call them in the right order.
        
        Parameters:
        x: input vector representing image data, one-dimensional vector
        """
        
        self.fill_input_nodes(x)
        self.fill_hidden_nodes()
        self.fill_output_nodes()
        
        

    ### Challenge 1
    # You've managed to complete the forward pass. However, the way we implemented the forward pass just now was
    # inefficient. There is actually a way to calculate values layer-by-layer instead of node-by-node. Can you
    # think of a formula that can calculate all hidden nodes at once, instead of one-at-a-time? 
    #
    # Hint: you will need to do matrix operations between the entire weight array, input array, and bias array 
    #       instead of just a column or element
    
    def fill_hidden_nodes_fast(self):
        """
        Assume the input nodes array is filled. Now, use these values to fill all hidden nodes at once.
        You will need to use self.N, self.W, and self.B.
        This should only take up a few lines
        """

        H = self.N @ self.W + self.B
        self.H = np.tanh(H)

        
    def fill_output_nodes_fast(self):
        """
        Assume the hidden nodes array is filled. Now, use these values to fill all output nodes at once.
        You will need to use self.H, self.V, and self.C.
        This should only take up a few lines.
        """

        Z = self.H @ self.V + self.C
        self.Z = 1 / (1 + np.exp(-1 * Z))

    
    def forward_fast(self, x):
        """
        Given an image vector, fill every node in the network using the more efficient methods you just wrote.
        
        Parameters:
        x: input vector representing image data, one-dimensional vector
        """
        
        self.fill_input_nodes(x)
        self.fill_hidden_nodes_fast()
        self.fill_output_nodes_fast()
        

        
    
    ### Challenge 2
    # Ok, so you've completed forward pass for real this time. But what do we do with this information?
    # If we want our network to learn anything, we'll need to use the outputs from forward pass to generate
    # a loss, or error, that measures how far away from the target our network was.
    #
    # The process is simple: forward pass an image through the network, then read the output.
    # Compare this output to the label corresponding to that image. These will both be one-dimensional
    # arrays of the same size. The most simple loss function is just calculating the distance between these
    # two vectors. 
    
    def calculate_loss(self, x, y):
        """
        Given an image vector and its corresponding label vector, calculate the loss.
        
        Parameters:
        x: input vector representing image data, one-dimensional vector
        y: input vector representing label, one-dimensional vector. Has a 1 in the position corresponding to the
           correct answer, and 0s everywhere else.
        """
        
        out = self.forward_fast(x)
        loss = np.sum((self.Z - y) ** 2)
        print(loss) # we will just print loss for now. Later we may need to save it for gradient descent.
        


Test your code here in the space below. Feel free to make as many additional cells as you wish.

In [5]:
# get a sample from the dataset to test data
sample_image = training_images[0]
sample_label = training_labels[0]

# create a NeuralNetwork object and test the .forward() method
net = NeuralNetwork()
net.forward(sample_image)
print(net.Z) # this should be a one-dimensional array of length 10. The output will be random and meaningless
             # for now because we have not done any backpropagation
    
print("\n")
    
# test .forward_fast(). The output should be the exact same as from the .forward() method.
net.forward_fast(sample_image)
print(net.Z)

print("\n")

# test .calculate_loss(). This should print the loss, a scalar value
loss = net.calculate_loss(sample_image, sample_label)

[0.77599697 0.60690992 0.65319114 0.24815725 0.06235111 0.94170779
 0.96151447 0.96085632 0.51771702 0.11653431]


[0.77599697 0.60690992 0.65319114 0.24815725 0.06235111 0.94170779
 0.96151447 0.96085632 0.51771702 0.11653431]


3.5557899910144712
