This is the implementation of a simple FashionMNIST classifer model using PyTorch and not d2l

In [12]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [14]:
train_data = datasets.FashionMNIST(root='data', train=True, download=True, transform= ToTensor(),)
test_data = datasets.FashionMNIST(root='data',train=False, download=False, transform= ToTensor(),)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [29]:
class MLPScratch(nn.Module):
    def __init__(self,num_inputs, num_outputs, num_hiddens, lr, sigma=0.01):
        super().__init__()
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.num_hiddens = num_hiddens
        

        self.W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens) *sigma)
        self.b1 = nn.Parameter(torch.zeros(num_hiddens))

        self.W2 = nn.Parameter(torch.randn(num_hiddens,num_outputs)*sigma)
        self.b2 = nn.Parameter(torch.zeros(num_outputs))
        
    def relu(self,X):
        a = torch.zeros_like(X)
        return torch.max(X,a)
        
    def forward(self, X):
        X = X.reshape((-1,self.num_inputs))
        H = self.relu(torch.matmul(X,self.W1) + self.b1)
        return torch.matmul(H,self.W2) + self.b2
        
    def loss_function(self, y_hat, y):
        log_softmax = y_hat - torch.logsumexp(y_hat, dim=1, keepdim=True)
        batch_indices = torch.arange(y_hat.size(0))
        correct_log_probs = log_softmax[batch_indices, y]
        return -correct_log_probs.mean()


In [44]:
batch_size = 10,000
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

device = 'cpu'
model = MLPScratch(784, 10, 256,.01).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

epochs = 70
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X, y in train_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        y_hat = model(X)
        loss = model.loss_function(y_hat, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X, y in test_loader:
        X, y = X.to(device), y.to(device)
        logits = model(X)
        predictions = torch.argmax(logits, dim=1)
        correct += (predictions == y).sum().item()
        total += y.size(0)
print(f"Test Accuracy: {correct / total:.4f}")

Epoch 1, Loss: 1.0367
Epoch 2, Loss: 0.6028
Epoch 3, Loss: 0.5224
Epoch 4, Loss: 0.4796
Epoch 5, Loss: 0.4549
Epoch 6, Loss: 0.4364
Epoch 7, Loss: 0.4166
Epoch 8, Loss: 0.4064
Epoch 9, Loss: 0.3931
Epoch 10, Loss: 0.3827
Epoch 11, Loss: 0.3732
Epoch 12, Loss: 0.3683
Epoch 13, Loss: 0.3613
Epoch 14, Loss: 0.3538
Epoch 15, Loss: 0.3462
Epoch 16, Loss: 0.3410
Epoch 17, Loss: 0.3378
Epoch 18, Loss: 0.3291
Epoch 19, Loss: 0.3267
Epoch 20, Loss: 0.3219
Epoch 21, Loss: 0.3175
Epoch 22, Loss: 0.3146
Epoch 23, Loss: 0.3100
Epoch 24, Loss: 0.3057
Epoch 25, Loss: 0.3013
Epoch 26, Loss: 0.2971
Epoch 27, Loss: 0.2950
Epoch 28, Loss: 0.2923
Epoch 29, Loss: 0.2875
Epoch 30, Loss: 0.2847
Epoch 31, Loss: 0.2815
Epoch 32, Loss: 0.2795
Epoch 33, Loss: 0.2744
Epoch 34, Loss: 0.2742
Epoch 35, Loss: 0.2698
Epoch 36, Loss: 0.2667
Epoch 37, Loss: 0.2651
Epoch 38, Loss: 0.2634
Epoch 39, Loss: 0.2610
Epoch 40, Loss: 0.2566
Epoch 41, Loss: 0.2554
Epoch 42, Loss: 0.2533
Epoch 43, Loss: 0.2500
Epoch 44, Loss: 0.24

In [35]:
def predict(model, image):
    model.eval()
    with torch.no_grad():
        image = image.to(device).unsqueeze(0)
        output = model(image)
        return torch.softmax(output, dim=1).item()
    

SyntaxError: unexpected EOF while parsing (936112332.py, line 1)