In [22]:
!pip install mnist



In [23]:
#importing the models
import torch
import numpy as np
import mnist
from sklearn.model_selection import train_test_split

In [24]:
#import and create the dataset or preprocess it
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

#As the model used is ANN which take 784 input variables but our test dataset is 2d therefore convert it into single dimension matrix
train_images = train_images.reshape(-1,28*28)
train_images = train_images/255.0 # as the range is from 0 to 255 so changed it to 0 to 1
#similarly for test_images
test_images = test_images.reshape(-1,28*28)/255.0

#Now the labels
train_labels = np.eye(10)[train_labels]
test_labels = np.eye(10)[test_labels]

# train_images = torch.tensor(train_images)
# test_images = torch.tensor(test_images)
# train_labels = torch.tensor(train_labels)
# test_labels = torch.tensor(test_labels)

print(train_labels.shape,test_labels.shape)

(60000, 10) (10000, 10)


In [25]:
# train_images = train_images.to("cuda")

In [26]:
#Helping functions
def softmax(x):
  exp_x = np.exp(x-np.max(x,axis=-1,keepdims=True))
  return exp_x/np.sum(exp_x,axis=-1,keepdims=True)

def sigmoid(x):
  return (1 / (1 + np.exp(-x)))

In [27]:
input_size = 784
hidden_size = 128
output_size = 10
learning_rate = 0.1

np.random.seed(0)
weights_input_hidden = np.random.randn(input_size, hidden_size)
biases_hidden = np.zeros(hidden_size)
weights_hidden_output = np.random.randn(hidden_size, output_size)
biases_output = np.zeros(output_size)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for i in range(len(train_images)):
        # Forward pass
        input_layer = train_images[i]
        hidden_layer_input = np.dot(input_layer, weights_input_hidden) + biases_hidden
        hidden_layer_output = sigmoid(hidden_layer_input)  # Sigmoid activation
        output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + biases_output
        output_layer_output = softmax(output_layer_input)

        # Calculate loss (cross-entropy)
        loss = -np.sum(train_labels[i] * np.log(output_layer_output))

        # Backpropagation
        output_error = output_layer_output - train_labels[i]
        hidden_error = np.dot(output_error, weights_hidden_output.T)
        hidden_delta = hidden_error * hidden_layer_output * (1 - hidden_layer_output)
        weights_hidden_output -= learning_rate * np.outer(hidden_layer_output, output_error)
        biases_output -= learning_rate * output_error
        weights_input_hidden -= learning_rate * np.outer(input_layer, hidden_delta)
        biases_hidden -= learning_rate * hidden_delta

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss}")


Epoch 1/10, Loss: 0.018713784618766
Epoch 2/10, Loss: 0.002085390365429011
Epoch 3/10, Loss: 0.013104398914626673
Epoch 4/10, Loss: 0.002630742030520313
Epoch 5/10, Loss: 0.0020550338996251595
Epoch 6/10, Loss: 7.15106892419132e-05
Epoch 7/10, Loss: 0.0008135580531129098
Epoch 8/10, Loss: 7.729843730715457e-06
Epoch 9/10, Loss: 2.8698955353250017e-06
Epoch 10/10, Loss: 0.0005724136490047826


In [28]:
# Testing
correct = 0
for i in range(len(test_images)):
    input_layer = test_images[i]
    hidden_layer_input = np.dot(input_layer, weights_input_hidden) + biases_hidden
    hidden_layer_output = 1 / (1 + np.exp(-hidden_layer_input))  # Sigmoid activation
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + biases_output
    output_layer_output = softmax(output_layer_input)
    prediction = np.argmax(output_layer_output)
    if prediction == np.argmax(test_labels[i]):
        correct += 1

accuracy = correct / len(test_images) * 100
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 95.82%
