 # Assignemnt 2

### Loading MNIST dataset 

In [2]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',transform=lambda x: np.array(x).flatten(),download=True,train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

In [3]:
def encode_labels(labels):
    encoded_labels = np.zeros((len(labels), 10))
    for i, label in enumerate(labels):
        encoded_labels[i][label] = 1
    return encoded_labels

train_Y = encode_labels(train_Y)
test_Y = encode_labels(test_Y)

In [4]:
def normalize_data(data):
    return data / 255

train_X = normalize_data(train_X)
test_X = normalize_data(test_X)

In [5]:
print(train_X[2])
print(train_Y.shape)

print(test_X.shape)
print(test_Y.shape)


[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

### Training process

In [6]:
def split_into_batches(data, labels, batch_size):
    batches = []
    permutatuion = np.random.permutation(data.shape[0])
    data_shuffled = data[permutatuion]
    labels_shuffled = labels[permutatuion]
    for i in range(0, data.shape[0], batch_size):
        batches.append((data_shuffled[i:i+batch_size], labels_shuffled[i:i+batch_size]))
    return batches

In [7]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


In [8]:
def forward_propagation(X, W, b):
    z= np.dot(X, W) + b
    return softmax(z)

In [9]:
def gradient_descent(X, y_true, y_pred, W, b, learning_rate):
    size = X.shape[0] 
    dW = np.dot(X.T, (y_pred - y_true)) / size
    db = np.sum(y_pred - y_true, axis=0) / size
    
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [10]:
def train_batch(batch,batch_labels,weights,bias,learning_rate):
    y_pred = forward_propagation(batch, weights, bias)
    weights, bias = gradient_descent(batch, batch_labels, y_pred, weights, bias, learning_rate)
    return weights, bias


In [25]:
weights,biases = train(train_X,train_Y,0.01,100,100)



In [30]:
np.save('weights2.npy', weights)
np.save('biases2.npy', biases)

### Testing process

In [12]:
def test(data,labels,weights,biases):
    predictions = forward_propagation(data, weights, biases)
    predicted= np.argmax(predictions, axis=1)
    actual = np.argmax(labels, axis=1)
    accuracy = np.sum(predicted == actual) / data.shape[0]
    return accuracy
   

In [29]:
test(test_X,test_Y,weights,biases)



np.float64(0.9158)

### Solution

In [19]:
from time import time

def train(training_data,labels,learning_rate,epochs,batch_size,accuracyFunction):
    weights = np.random.rand(training_data.shape[1], 10)
    biases = np.zeros(10)
    start = time()
    for epoch in range(epochs):
        batches = split_into_batches(training_data, labels, batch_size)
        for batch, batch_labels in batches:
            weights, biases = train_batch(batch, batch_labels, weights, biases, learning_rate)
        if (epoch+1) % 50 == 0:
            duration = time() - start
            accuracy = accuracyFunction(weights, biases) *100
            print(f'Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.2f}% | Duration: {duration:.2f}s')
            np.save(f'models/weights_{epoch+1}.npy', weights)
            np.save(f'models/biases_{epoch+1}.npy', biases)
    return weights, biases

In [20]:
train(train_X,train_Y,0.01,500,100, lambda w, b: test(test_X, test_Y, w, b))

Epoch 50/500 - Accuracy: 91.06% | Duration: 34.69s
Epoch 100/500 - Accuracy: 91.65% | Duration: 76.54s
Epoch 150/500 - Accuracy: 91.87% | Duration: 118.79s
Epoch 200/500 - Accuracy: 92.04% | Duration: 160.42s
Epoch 250/500 - Accuracy: 92.22% | Duration: 197.20s
Epoch 300/500 - Accuracy: 92.26% | Duration: 233.60s
Epoch 350/500 - Accuracy: 92.30% | Duration: 268.97s
Epoch 400/500 - Accuracy: 92.31% | Duration: 304.06s
Epoch 450/500 - Accuracy: 92.37% | Duration: 339.47s
Epoch 500/500 - Accuracy: 92.40% | Duration: 374.52s


(array([[0.62648234, 0.9877174 , 0.93342912, ..., 0.6201964 , 0.96479561,
         0.60056878],
        [0.13288015, 0.75012825, 0.77921429, ..., 0.50864505, 0.27296994,
         0.92911452],
        [0.24094357, 0.3005735 , 0.81222496, ..., 0.57551565, 0.16132618,
         0.43317931],
        ...,
        [0.35517075, 0.18327582, 0.85509174, ..., 0.31068536, 0.25400309,
         0.47066609],
        [0.15211833, 0.10667057, 0.24110797, ..., 0.90491558, 0.47958982,
         0.36430446],
        [0.91854232, 0.83102895, 0.83656544, ..., 0.33149008, 0.74249617,
         0.36807191]]),
 array([-0.94600694,  0.61915391,  0.22557197, -0.51574565,  0.12335081,
         2.17994722, -0.3081909 ,  1.23631833, -2.20442952, -0.40996922]))