 # Assignemnt 2

### Loading MNIST dataset 

In [14]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',transform=lambda x: np.array(x).flatten(),download=True,train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

In [15]:
def encode_labels(labels):
    encoded_labels = np.zeros((len(labels), 10))
    for i, label in enumerate(labels):
        encoded_labels[i][label] = 1
    return encoded_labels

train_Y = encode_labels(train_Y)
test_Y = encode_labels(test_Y)

In [16]:
def normalize_data(data):
    return data / 255

train_X = normalize_data(train_X)
test_X = normalize_data(test_X)

In [17]:
print(train_X[2])
print(train_Y.shape)

print(test_X.shape)
print(test_Y.shape)


[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

### Training process

In [18]:
def split_into_batches(data, labels, batch_size):
    batches = []
    permutatuion = np.random.permutation(data.shape[0])
    data_shuffled = data[permutatuion]
    labels_shuffled = labels[permutatuion]
    for i in range(0, data.shape[0], batch_size):
        batches.append((data_shuffled[i:i+batch_size], labels_shuffled[i:i+batch_size]))
    return batches

In [19]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


In [20]:
def forward_propagation(X, W, b):
    z= np.dot(X, W) + b
    return softmax(z)

In [21]:
def cross_entropy_loss(true_label, predicetd_labels):
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / y_true.shape[0]

In [22]:
def gradient_descent(X, y_true, y_pred, W, b, learning_rate):
    size = X.shape[0] 
    dW = np.dot(X.T, (y_pred - y_true)) / size
    db = np.sum(y_pred - y_true, axis=0) / size
    
    # Update weights and biases
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [23]:
def train_batch(batch,batch_labels,weights,bias,learning_rate):
    y_pred = forward_propagation(batch, weights, bias)
    weights, bias = gradient_descent(batch, batch_labels, y_pred, weights, bias, learning_rate)
    return weights, bias


In [24]:
def train(training_data,labels,learning_rate,epochs,batch_size):
    weights = np.random.rand(training_data.shape[1], 10)
    biases = np.zeros(10)
    for _ in range(epochs):
        batches = split_into_batches(training_data, labels, batch_size)
        for batch, batch_labels in batches:
            weights, biases = train_batch(batch, batch_labels, weights, biases, learning_rate)
    return weights, biases

In [25]:
weights,biases = train(train_X,train_Y,0.01,100,100)



In [30]:
np.save('weights2.npy', weights)
np.save('biases2.npy', biases)

### Testing process

In [26]:
def test(data,labels,weights,biases):
    predictions = forward_propagation(data, weights, biases)
    predicted= np.argmax(predictions, axis=1)
    actual = np.argmax(labels, axis=1)
    accuracy = np.sum(predicted == actual) / data.shape[0]
    return accuracy
   

In [29]:
test(test_X,test_Y,weights,biases)



np.float64(0.9158)