In [2]:
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch import nn
import torch
from torch.optim import SGD

In [80]:
training_data = MNIST(
    root="../data",
    train=True,
    download=True,
    transform=ToTensor()
)

dataloader = DataLoader(training_data, batch_size=4, shuffle=True,num_workers=2)


In [87]:
class TinyModel(nn.Module):

    def __init__(self):
        super(TinyModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(784, 200)
        self.activation = nn.ReLU()
        self.linear2 = nn.Linear(200, 10)
        self.softmax = nn.Softmax(1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x

tinymodel = TinyModel()


# optim = Adam(tinymodel.parameters(),lr=1e-9)
optim = SGD(tinymodel.parameters(), lr=1e-2, momentum=0.9)

# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()
        

In [88]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
            
def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [83]:
train_loop(dataloader, tinymodel, loss_fn, optim)

loss: 2.309517  [    4/60000]
loss: 2.281843  [  404/60000]
loss: 2.167447  [  804/60000]
loss: 1.947353  [ 1204/60000]
loss: 1.505543  [ 1604/60000]
loss: 1.688819  [ 2004/60000]
loss: 1.941329  [ 2404/60000]
loss: 1.731002  [ 2804/60000]
loss: 2.353802  [ 3204/60000]
loss: 2.022069  [ 3604/60000]
loss: 1.895247  [ 4004/60000]
loss: 1.530955  [ 4404/60000]
loss: 1.462959  [ 4804/60000]
loss: 1.717743  [ 5204/60000]
loss: 1.680048  [ 5604/60000]
loss: 1.645811  [ 6004/60000]
loss: 1.551320  [ 6404/60000]
loss: 1.476107  [ 6804/60000]
loss: 1.480225  [ 7204/60000]
loss: 1.795961  [ 7604/60000]
loss: 1.712790  [ 8004/60000]
loss: 1.496665  [ 8404/60000]
loss: 1.948172  [ 8804/60000]
loss: 1.506967  [ 9204/60000]
loss: 1.684376  [ 9604/60000]
loss: 1.464545  [10004/60000]
loss: 1.462043  [10404/60000]
loss: 1.473217  [10804/60000]
loss: 1.508594  [11204/60000]
loss: 1.472673  [11604/60000]
loss: 1.466530  [12004/60000]
loss: 1.471798  [12404/60000]
loss: 1.497318  [12804/60000]
loss: 1.55

In [84]:
test_loop(dataloader, tinymodel, loss_fn)

Test Error: 
 Accuracy: 94.0%, Avg loss: 1.524532 



In [86]:
for params in tinymodel.parameters():
    params.shape

In [3]:
test = torch.rand(1, 32, 32, 1)

In [5]:
test.shape

torch.Size([1, 32, 32, 1])