In [160]:
from sklearn.datasets import fetch_openml
import numpy as np
import torch
import math
from torch import nn

x, y = fetch_openml("mnist_784", version = 1, return_X_y = True)
y = np.array([int(v) for v in y])[:,np.newaxis]

In [161]:
x = torch.tensor(x, dtype = torch.float)
y = torch.tensor(y, dtype = torch.long)

In [162]:
print(x.shape)
print(y.shape)

torch.Size([70000, 784])
torch.Size([70000, 1])


In [163]:
# Set the train and validation set
x_train = x[0:50000]
y_train = y[0:50000].squeeze()
x_validation = x[50000:]
y_validation = y[50000:]

In [164]:
class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784,10)/math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))
    def forward(self, xb):
        return xb @ self.weights + self.bias
    
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

model = Mnist_Logistic()
print(model.weights.dtype)

torch.float32


In [165]:
bs = 64 # batch size
xb = x_train[0:bs] # a mini-batch from x
yb = y_train[0:bs] # a mini-batch from y
print(xb.size())
print(yb)

torch.Size([64, 784])
tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1,
        1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5,
        9, 3, 3, 0, 7, 4, 9, 8, 0, 9, 4, 1, 4, 4, 6, 0])


In [166]:
import torch.nn.functional as F
loss_func = F.cross_entropy

In [167]:
print(accuracy(model(xb), yb))

tensor(0.1094)


In [168]:
print(model.weights.dtype, yb.dtype)

torch.float32 torch.int64


In [169]:
print(loss_func(model(xb), yb))

tensor(96.5129, grad_fn=<NllLossBackward>)


In [170]:
lr = 0.5
epochs = 2
n,c = x_train.shape
def fit():
    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()

fit()

In [177]:
print(accuracy(model(xb), yb))

tensor(0.9062)


In [179]:
print(loss_func(model(xb), yb))

tensor(3312.4968, grad_fn=<NllLossBackward>)
