In [173]:
import numpy as np
from tqdm import trange
import matplotlib.pyplot as plot
import torchvision.datasets as datastore
import time

# Load the MNIST dataset
datastore.MNIST('./data', train=True, download=True)
datastore.MNIST('./data', train=False, download=True)

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test

In [174]:
# Method to load data as a numpy array
def getData(file):
    f = open(file, 'rb').read()
    return np.frombuffer(f, dtype=np.uint8).copy()

In [175]:
# Call the load data method and get the images in stored in the following format as mentioned in 
# the assignment pdf[total images x (784)] for training and test data
# We also have to ignore the 0x10 i.e. 16 bytes metadata from image

imageTraining = getData('./data/MNIST/raw/train-images-idx3-ubyte')[0x10:].reshape(-1, 784)
imageTesting = getData('./data/MNIST/raw/t10k-images-idx3-ubyte')[0x10:].reshape(-1, 784)

# Ignore the 8 bytes metadata from label data, when loading the training and test labels
labelsTraining = getData('./data/MNIST/raw/train-labels-idx1-ubyte')[8:]
labelsTesting = getData('./data/MNIST/raw/t10k-labels-idx1-ubyte')[8:]

print(imageTraining.shape, labelsTraining.shape)
print(imageTesting.shape, labelsTesting.shape)

(60000, 784) (60000,)
(10000, 784) (10000,)


In [176]:
# Create one hot matrix for the training labels
# Initialize the matrix to zeroes, for [60000 x 10] training data
onehotTraining = np.zeros((labelsTraining.shape[0], 10), np.float32)
# Set the correct position value to 1
onehotTraining[range(onehotTraining.shape[0]), labelsTraining] = 1

# Create one hot matrix for the testing labels
# Initialize the matrix to zeroes, for [60000 x 10] training data
onehotTesting = np.zeros((labelsTesting.shape[0], 10), np.float32)
# Set the correct position to 1
onehotTesting[range(onehotTesting.shape[0]), labelsTesting] = 1

print(onehotTraining.shape, onehotTesting.shape)

(60000, 10) (10000, 10)


In [177]:
# Define the parameters
batchSize = 1000
numOfEpochs = 400
learningRate = 0.000000075

#Initialize the weight matrix, which is to be learned as zeroes

Weights = np.zeros((784, 10))

print(Weights)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [178]:
# Method to compute the gradient and update the weight matrix using it and the learning rate defined above
def forward(images, labels, Weights):
    gradient = images.T.dot(images.dot(Weights) - labels) / batchSize
    Weights = Weights - (learningRate * gradient)
    return Weights

In [179]:
# Some lists to plot the graphs
quadLosses = []
testAccuracies = []
epochs = []

In [None]:
# To count training time
startCounter = time.time()

#for section 5
# newSample = np.random.choice(60000,size=500,replace=False) 
# sample = imageTraining[newSample]

# Stochastic gradient descent implementations
for i in range(numOfEpochs):
    epochs.append(i)      # To plot graphs

    # select batchSize numbers uniformly, randomly and with replacement to train
    batch = np.random.randint(1, 60000, size=(batchSize))
    # for section 5
    # batch = np.random.randint(1, 500, size=(batchSize))
    # images = sample[batch]

    # select the images corresponding to them from training dataset
    images = imageTraining[batch]
    # select the corresponding one-hot labels from training labels
    labels = onehotTraining[batch]

    # Call the forward method to update the weights
    Weights = forward(images, labels, Weights)
    predictedLabels = images.dot(Weights)
    quadraticLoss = np.sum((labels - predictedLabels)**2) / batchSize
    quadLosses.append(quadraticLoss)      # To plot graphs
    #print(quadraticLoss)

    # predict the labels on the training dataset
    predictedTrainingLabels = imageTraining.dot(Weights)
    # compute the training accuracy
    trainingAccu = np.sum(labelsTraining == np.argmax(predictedTrainingLabels, axis=1)) / labelsTraining.shape[0]
    
    # predict the labels on the test dataset
    predictedTestingLabels = imageTesting.dot(Weights)
    # compute the testing accuracy
    testingAccu = np.sum(labelsTesting == np.argmax(predictedTestingLabels, axis=1)) / labelsTesting.shape[0]

    # To plot graphs
    testAccuracies.append(testingAccu)
 
endCounter = time.time() - startCounter 
print('loss:' , quadraticLoss)
print('accuracy:' , testingAccu)

In [None]:
#Plot graph of test accuracy vs num of epochs
plot.plot(epochs,testAccuracies)
plot.xlabel('Epochs')
plot.ylabel('Test Accuracy')
plot.title('Test accuracy vs num of Epochs')
plot.show()

In [None]:
#Plot graph of training loss vs num of epochs 
plot.plot(epochs,quadLosses)
plot.xlabel('Epochs')
plot.ylabel('Quadratic Loss')
plot.title('Loss vs num of Epochs')
plot.show()

In [None]:
# Calculate Training time
print(endCounter)