 # Assignemnt 3

### Loading MNIST dataset 

In [18]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',transform=lambda x: np.array(x).flatten(),download=True,train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

In [19]:
def encode_labels(labels):
    encoded_labels = np.zeros((len(labels), 10))
    for i, label in enumerate(labels):
        encoded_labels[i][label] = 1
    return encoded_labels

train_Y = encode_labels(train_Y)
test_Y = encode_labels(test_Y)

In [20]:
def normalize_data(data):
    return data / 255

train_X = normalize_data(train_X)
test_X = normalize_data(test_X)

In [21]:

print(train_Y.shape)

print(test_X.shape)
print(test_Y.shape)


(60000, 10)
(10000, 784)
(10000, 10)


### Training process

In [23]:
def split_into_batches(data, labels, batch_size):
    batches = []
    permutatuion = np.random.permutation(data.shape[0])
    data_shuffled = data[permutatuion]
    labels_shuffled = labels[permutatuion]
    for i in range(0, data.shape[0], batch_size):
        batches.append((data_shuffled[i:i+batch_size], labels_shuffled[i:i+batch_size]))
    return batches

In [64]:
def sigmoid(x):
    clipped = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-clipped))

In [63]:
def sigmoid_prime(x):
    s = sigmoid(x)
    return s * (1 - s)

In [26]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


In [98]:
def forward_propagation(X, W, b):
    activations = [X]
    zs=[]
    for w,b in zip(W, b):
        z = np.dot(activations[-1], w) + b
        zs.append(z)
        activations.append(sigmoid(z))
    activations[-1] = softmax(zs[-1])
    return activations, zs

In [97]:
def backward_propagation(W, b, t, activations, zs):
   batch_size = t.shape[0]
   gradient_b = [np.zeros(b.shape) for b in b]
   gradient_W = [np.zeros(w.shape) for w in W]
   error = activations[-1] - t
   for layer in range(len(W) - 1, -1, -1):
       gradient_b[layer] = np.mean(error, axis=0, keepdims=True)
       gradient_W[layer] = np.dot(activations[layer].T, error) / batch_size
       if layer > 0:  
            error = np.dot(error, W[layer].T) * sigmoid_prime(zs[layer-1])
   return gradient_W, gradient_b

In [29]:
def gradient_descent(X, y_true, y_pred, W, b, learning_rate):
    size = X.shape[0] 
    dW = np.dot(X.T, (y_pred - y_true)) / size
    db = np.sum(y_pred - y_true, axis=0) / size
    
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [99]:
def train_batch(batch,batch_labels,weights,biases,learning_rate):
      activations, zs = forward_propagation(batch, weights, biases)
      gradient_W, gradient_b = backward_propagation(weights, biases, batch_labels, activations, zs)
      for i in range(len(weights)):
          weights[i], biases[i] = weights[i] - learning_rate * gradient_W[i], biases[i] - learning_rate * gradient_b[i]
      return weights, biases

In [52]:
def initialize_layers(input_size, hidden_layers, output_size):
    weights = []
    biases = []
    layers = [input_size] + hidden_layers + [output_size]
  
    for i in range(1, len(layers)):
        W=np.random.randn(layers[i-1], layers[i]) * 0.01
        b = np.zeros((1, layers[i]))
        weights.append(W)
        biases.append(b)
    return weights, biases



### Testing process

In [91]:
def test(data,labels,weights,biases):
    activations,zs = forward_propagation(data, weights, biases)
    predictions = np.argmax(activations[-1], axis=1)
    true_labels = np.argmax(labels, axis=1)
    accuracy = np.mean(predictions == true_labels)
    return accuracy
   

### Solution

In [84]:
import os
def save_model(weights, biases,epochs):
    os.makedirs(f'models/epochs_{epochs}', exist_ok=True)
    for layer in range(len(weights)):
        np.save(f'models/epochs_{epochs}/weights_{layer+1}.npy', weights[layer])
        np.save(f'models/epochs_{epochs}/biases_{layer+1}.npy', biases[layer])

def load_model(epochs,layers):
    weights = []
    biases = []
    for i in range(layers):
        weights.append(np.load(f'models/epochs_{epochs}/weights_{i+1}.npy'))
        biases.append(np.load(f'models/epochs_{epochs}/biases_{i+1}.npy'))
    return weights, biases

In [95]:
from time import time

def train(training_data,labels,hidden_layers,learning_rate,epochs,batch_size,accuracyFunction):
    weights, biases = initialize_layers(training_data.shape[1], hidden_layers, labels.shape[1])
    start = time()
    for epoch in range(epochs):
        batches = split_into_batches(training_data, labels, batch_size)
        for batch, batch_labels in batches:
            weights, biases = train_batch(batch, batch_labels, weights, biases, learning_rate)
        if (epoch+1) % 50 == 0:
            duration = time() - start
            save_model(weights, biases, epoch+1)
            accuracy = accuracyFunction(weights, biases) *100
            print(f'Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.2f}% | Duration: {duration:.2f}s')
            
    return weights, biases

In [100]:
train(train_X,train_Y,[100],0.01,100,100, lambda w, b: test(test_X, test_Y, w, b))

Epoch 50/100 - Accuracy: 91.72% | Duration: 339.78s
Epoch 100/100 - Accuracy: 93.39% | Duration: 721.35s


([array([[ 0.00955805, -0.01843738, -0.00191704, ...,  0.01280728,
          -0.01772529,  0.0220062 ],
         [-0.0006264 ,  0.01566429, -0.00344191, ...,  0.00476574,
          -0.01069859,  0.0252341 ],
         [-0.00317104, -0.00920206,  0.00280177, ..., -0.00702375,
          -0.00843816, -0.007645  ],
         ...,
         [ 0.00449444, -0.0035135 , -0.00932747, ..., -0.0100698 ,
          -0.01969841, -0.01515436],
         [-0.00174201, -0.00828969, -0.01298769, ..., -0.00349879,
          -0.00572966,  0.00343889],
         [-0.00958202, -0.00535816,  0.00674661, ..., -0.00986339,
           0.00430376,  0.00653739]]),
  array([[-2.81777536e-01,  3.86455750e-01,  3.98871767e-01,
           3.36974216e-01, -3.84425236e-01,  6.67146847e-02,
          -2.21469697e-01,  3.71612528e-01, -5.08712218e-01,
          -1.64676026e-01],
         [-5.49520159e-01,  4.25418487e-01, -2.80190333e-01,
           6.66078232e-01, -4.11270988e-01,  3.36804822e-01,
          -4.21475873e-01, 