### Assignment 2 - Machine Learning


In [None]:
import time
import matplotlib.pyplot as plt
import pickle
import gzip
import numpy as np
import random

In [None]:
%load mnist_loader.py
"""
mnist_loader
~~~~~~~~~~~~
A library to load the MNIST image data.  For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""

#### Libraries
# Standard library
import pickle
import gzip

# Third-party libraries
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.
    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.
    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.
    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.
    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.
    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.
    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.
    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    #print(training_results[0])
    training_data = zip(training_inputs, training_results)
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e


ValueError: 'mnist_loader.py' was not found in history, as a file, url, nor in the user namespace.

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [None]:

class NeuralNetwork:
    def __init__(self, num_neurons=16, num_layers=2, input_size=784, output_size=10):
        '''Initialize the NeuralNetwork with specified parameters.'''
        self.weights = []
        denominator = np.sqrt(input_size * output_size)

        # Initialize the first layer
        first_layer_shape = (num_neurons, input_size + 1)
        self.weights.append(np.random.normal(0, 1/denominator, first_layer_shape))

        # Initialize intermediate layers (if any)
        for _ in range(num_layers - 2):
            intermediate_layer_shape = (num_neurons, num_neurons + 1)
            self.weights.append(np.random.normal(0, 1/denominator, intermediate_layer_shape))

        # Initialize the last layer
        last_layer_shape = (output_size, num_neurons + 1)
        self.weights.append(np.random.normal(0, 1/denominator, last_layer_shape))

    def match(self, pixels):
        '''Returns predicted number for the given input pixels.'''

        # Initialize input vector with bias term
        input_vector = np.vstack(([1], pixels))

        # Forward Pass Through Layers
        activation_vectors = [self.forward_pass(input_vector)]

        # Return the predicted number based on the output layer
        return np.argmax(activation_vectors[-1])

    def forward_pass(self, input_vector):
        '''Performs a forward pass through the neural network.'''

        # Initialize the activation vector
        activation_vector = sigmoid(self.weights[0].dot(input_vector))

        # Iterative Forward Pass Through Layers
        for weight_matrix in self.weights[1:]:
            input_vector = np.vstack(([1], activation_vector))
            net_input = weight_matrix.dot(input_vector)
            activation_vector = sigmoid(net_input)

        return activation_vector

    def fit(self, pixels): #any version
        '''Function that returns predicted vector for input vector of pixes.
        '''
        rozmiar = len(self.weights)

        # Fast forward
        Xs = [np.vstack(([1], pixels))]
        As = [sigmoid(self.weights[0].dot(Xs[0]))]

        for step in range(1, rozmiar):
            Xs.append(np.vstack(([1], As[-1])))
            net = self.weights[step].dot(Xs[step])
            As.append(sigmoid(net))

        return As[-1]

    def calculate_accuracy(self, input_list, target_list):
        '''Calculates the accuracy of predicted numbers compared to the target vectors for a set of inputs.'''


        correct_predictions = sum(self.match(input_data) == np.argmax(target_data) for input_data, target_data in zip(input_list, target_list))
        total_inputs = len(input_list)

        accuracy = correct_predictions / total_inputs
        return accuracy

    def test_predictions(self, input_list, target_list):
        '''Evaluates the correctness of predicted values compared to the target values for a set of inputs.'''

        correct_predictions = sum(self.match(input_data) == target_data for input_data, target_data in zip(input_list, target_list))
        total_inputs = len(input_list)
        return correct_predictions / total_inputs


    def train_epochs(self, input_data, target_data, learning_rate=0.1, num_epochs=100):
        '''Trains the neural network using online learning for a specified number of epochs.'''
        num_examples = np.shape(input_data)[0]

        for epoch in range(num_epochs):
            # Shuffle the data for each epoch
            shuffled_data = list(zip(input_data, target_data))
            random.shuffle(shuffled_data)
            input_data, target_data = zip(*shuffled_data)

            for example in range(num_examples):
                input_vector = input_data[example]
                target_vector = target_data[example]

                # Forward pass
                input_layer = np.vstack(([1], input_vector))
                net_hidden = self.weights[0].dot(input_layer)
                activation_hidden = sigmoid(net_hidden)
                input_with_bias = np.vstack(([1], activation_hidden))
                net_output = self.weights[1].dot(input_with_bias)
                activation_output = sigmoid(net_output)

                # Backpropagation
                output_error = activation_output - target_vector
                delta_output = output_error * activation_output * (1 - activation_output)
                grad_output_weights = delta_output.dot(input_with_bias.T)

                delta_hidden = (self.weights[1].T).dot(delta_output)[1:] * activation_hidden * (1 - activation_hidden)
                grad_hidden_weights = delta_hidden.dot(input_layer.T)

                # Update weights
                self.weights[0] -= learning_rate * grad_hidden_weights
                self.weights[1] -= learning_rate * grad_output_weights

            print('Epoch', epoch + 1)


    def train_batch(self, input_data, target_data, learning_rate=0.1, num_epochs=10, batch_size=1):
        '''Updates weights using batch learning for 2 layers.'''
        num_examples = np.shape(input_data)[0]

        for epoch in range(num_epochs):
            # Shuffle the data for each epoch
            shuffled_data = list(zip(input_data, target_data))
            random.shuffle(shuffled_data)
            input_data, target_data = zip(*shuffled_data)

            # Calculate the number of batches
            num_batches = int(num_examples / batch_size)
            print('Epoch', epoch + 1)

            for batch_idx in range(num_batches):
                W1_gradients = []
                W2_gradients = []
                current_input = input_data[batch_idx * batch_size:(batch_idx + 1) * batch_size]
                current_target = target_data[batch_idx * batch_size:(batch_idx + 1) * batch_size]

                for example_idx in range(batch_size):
                    input_vector = current_input[example_idx]
                    target_vector = current_target[example_idx]

                    # Forward pass
                    input_with_bias = np.vstack(([1], input_vector))
                    net_hidden = self.weights[0].dot(input_with_bias)
                    activation_hidden = sigmoid(net_hidden)
                    input_output_with_bias = np.vstack(([1], activation_hidden))
                    net_output = self.weights[1].dot(input_output_with_bias)
                    activation_output = sigmoid(net_output)

                    # Backpropagation
                    output_error = activation_output - target_vector
                    delta_output = output_error * activation_output * (1 - activation_output)
                    W2_gradients.append(delta_output.dot(input_output_with_bias.T))

                    delta_hidden = (self.weights[1].T).dot(delta_output)[1:] * activation_hidden * (1 - activation_hidden)
                    W1_gradients.append(delta_hidden.dot(input_with_bias.T))

                # Update weights
                self.weights[0] -= learning_rate * sum(W1_gradients) / batch_size
                self.weights[1] -= learning_rate * sum(W2_gradients) / batch_size

    def online_multi_layer(self, input_list, result_list, c=0.1, n=100):
        '''Online learning for case with any number of layers.'''
        training_size = np.shape(input_list)[0]

        for epoch in range(n):
            print('Epoch', epoch + 1)
            shuffled_data = list(zip(input_list, result_list))
            random.shuffle(shuffled_data)
            input_list, result_list = zip(*shuffled_data)

            for item in range(training_size):
                input = input_list[item]
                output = result_list[item]
                num_layers = len(self.weights)

                # Forward pass
                inputs = [np.vstack(([1], input))]
                activations = [sigmoid(self.weights[0].dot(inputs[0]))]

                for step in range(1, num_layers):
                    inputs.append(np.vstack(([1], activations[-1])))
                    net = self.weights[step].dot(inputs[step])
                    activations.append(sigmoid(net))

                # Backpropagation
                loss = activations[-1] - output
                delta_output = loss * activations[-1] * (1 - activations[-1])
                weight_gradients = [delta_output.dot(inputs[-2].T)]

                for step in range(num_layers - 2, -1, -1):
                    dLa1 = (self.weights[step + 1].T).dot(delta_output)[1:]
                    delta_hidden = dLa1 * activations[step] * (1 - activations[step])
                    delta_output = delta_hidden  # Update delta_output for the next layer
                    weight_gradients.append(delta_hidden.dot(inputs[step].T))

                # Update weights
                for i in range(num_layers):
                    self.weights[i] -= c * weight_gradients[-i - 1]

In [None]:
# Upload our data
tr_d, va_d, te_d  = load_data_wrapper()
input_list, result_list = zip(*list(tr_d))
input_test, result_test = zip(*list(te_d))

FileNotFoundError: [Errno 2] No such file or directory: 'mnist.pkl.gz'

In [None]:
# Create our network
network1 = NeuralNetwork(16, 2, 784, 10)

In [None]:
#online 2 layer
epochs = 10

network1.train_epochs(input_list, result_list, 0.1, epochs)

print('Train data accuracy', network1.calculate_accuracy(input_list, result_list))

print('Test data accuracy', network1.test_predictions(input_test, result_test))

Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Train data accuracy 0.9647
Test data accuracy 0.9442


In [None]:
#batch 2 layer
network2 = NeuralNetwork(16, 2, 784, 10)

network2.train_batch(input_list, result_list, 0.1, epochs, 10)


print('Train data accuracy', network2.calculate_accuracy(input_list, result_list))

print('Test data accuracy', network2.test_predictions(input_test, result_test))

Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Train data accuracy 0.9285
Test data accuracy 0.9298


In [None]:
#online more layers
network_3layers = NeuralNetwork(16,3,784,10)
network_3layers.online_multi_layer(input_list, result_list, 0.1, 10)

print('Train data accuracy', network_3layers.calculate_accuracy(input_list, result_list))

print('Test data accuracy', network_3layers.test_predictions(input_test, result_test))

NameError: name 'input_list' is not defined