In [7]:
from matplotlib import pyplot
import math
import numpy as np
import torch
import torchvision.datasets as datasets
import torch.nn.functional as F

##### Function definition

In [8]:
loss_func = F.cross_entropy

def model(x):
    return x @ weights + bias

def accuracy(Y_hat, Y):
    preds = torch.argmax(Y_hat, dim=1)
    return (preds == Y).float().mean()




##### Loading dataset

In [9]:
#import data
mnist_trainset = datasets.MNIST(root='../data', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='../data', train=False, download=True, transform=None)

#load train set into tensors
train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=1, shuffle=True)
X_train = train_loader.dataset.data
Y_train = train_loader.dataset.targets

#load train set into tensors
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=10000, shuffle=False)
X_test = test_loader.dataset.data
Y_test = test_loader.dataset.targets

##### Preprocessing

In [90]:
#scale data to [0:1] and convert to float32
X_train = (X_train.to(dtype=torch.float32) / X_train.max().to(dtype=torch.float32))
X_test = (X_test.to(dtype=torch.float32) / X_test.max().to(dtype=torch.float32))

#Flatten train and test data
X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)

print("Train examples : ",X_train.shape[0])
print("Test examples : ",X_test.shape[0])
print("Nb of features : ",X_train.shape[1])

Train examples :  60000
Test examples :  10000
Nb of features :  784


##### Initialization

In [91]:
weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

##### Gradient descent

In [92]:
lr_init = 0.5 #learning rate initialization
decay = 1  # learning rate
epochs = 100  # how many epochs to train for
n = X_train.shape[0] #number of training examples
batch_size = 1000


for epoch in range(epochs):
    lr = lr_init / (1 + decay * epoch)
    for i in range(n // batch_size):
        X = X_train[i*batch_size: (i+1) *batch_size]
        Y = Y_train[i*batch_size: (i+1) *batch_size]
        X = model(X)

        loss = loss_func( X , Y )
            

        loss.backward()
        
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()
    
    if((epoch+1)%10 == 0):
        print(epoch + 1 , "\t", loss.item())


10 	 0.24010531604290009
20 	 0.22596538066864014
30 	 0.218764528632164
40 	 0.2140415608882904
50 	 0.21055547893047333
60 	 0.20781202614307404
70 	 0.20556947588920593
80 	 0.203690767288208
90 	 0.20208892226219177
100 	 0.20070534944534302


##### Predictions

In [93]:
preds = model(X_test)


print("Accuracy on test set : ", round(accuracy( preds , Y_test).item(),3))

Accuracy on test set :  0.926
