In [88]:
from pathlib import Path
import requests
import pickle
import gzip
import matplotlib.pyplot as plt
import numpy as np
import torch
import math
from IPython.core.debugger import set_trace

In [89]:
#Reading Data
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/main/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [90]:
#Convert data to tensor
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n = x_train.shape[0]

In [91]:
weights = torch.randn(784, 10) / math.sqrt(784)#Random weights for the layers
weights.requires_grad_()#Backpropogation ke lie
bias = torch.zeros(10, requires_grad=True) #same as above, for back prop

In [92]:
def log_softmax(x):#Using log of the softmax function as our activation function
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):#Calculating the values of the next layer
    return log_softmax(xb @ weights + bias)

bs = 64 #batchsize for training
xb = x_train[0:bs]
preds = model(xb)#without any training


In [93]:
def negloglike(input, target):#loss function generation, gives the mean, the inputs are the individual log likelihood values
    return -input[range(target.shape[0]),target].mean()

loss_function = negloglike

yb = y_train[0:bs]#training data

print(loss_function(preds,yb))

tensor(2.2896, grad_fn=<NegBackward0>)


In [94]:
def accuracy(out, yb):#Checking the accuracy
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

print(accuracy(preds,yb))

tensor(0.1562)


In [95]:
lr = 0.5# learning rate

for j in range(20):#training twice
    for i in range((n - 1) // bs + 1):
        #         set_trace()
        xbatch = x_train[i*bs:i*bs+bs]# batch in which we are training
        ybatch = y_train[i*bs:i*bs+bs]
        prediction = model(xbatch)#Prediciton
        loss = loss_function(prediction, ybatch)#calculation of loss function

        loss.backward()#Updating the gradients, backpropogation
        with torch.no_grad():#Gradient descent for changing the weights
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()#setting gradient to 0 otherwise it causes issues
            bias.grad.zero_()



In [96]:
print(loss_function(model(x_valid), y_valid), accuracy(model(x_valid), y_valid))

tensor(0.2818, grad_fn=<NegBackward0>) tensor(0.9249)
