In [1]:
import torch
import torchvision

In [2]:
print("torch:", torch.__version__)

torch: 2.3.0


In [3]:
from torchvision.datasets import MNIST

In [4]:
from torchvision.transforms import Lambda, ToTensor, Compose
trasform = Compose([
    ToTensor(),
    Lambda(lambda image: image/255),
    Lambda(lambda image: image.view(784))
])

data_train = MNIST(root="./", download=True, train=True, transform=trasform)
data_test = MNIST(root="./", download=True, train=False, transform=trasform)

In [5]:
data_train[0][0].shape

torch.Size([784])

In [6]:
from torch import nn, optim

class Model(nn.Module):
    def __init__(self, sizes):      #[128, 64, 10]
        super().__init__()
        self.hidden_layer_1 = nn.Linear(784,sizes[0])
        self.act1= nn.Sigmoid()
        self.hidden_layer_2 = nn.Linear(sizes[0], sizes[1])
        self.act2 = nn.Sigmoid()
        self.output_layer = nn.Linear(sizes[1], sizes[2])
        self.act3 = nn.Softmax(dim=1)

        self.loss = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters())

    
    def forward(self, inputs):
        x = self.hidden_layer_1(inputs)
        x = self.act1(x)
        x = self.hidden_layer_2(x)
        x = self.act2(x)
        x = self.output_layer(x)
        x = self.act3(x)
        return x

    def fit(self, X, Y):
        self.optimizer.zero_grad()
        y_pred = self.forward(X)
        loss = self.loss(y_pred, Y)
        loss.backward()
        self.optimizer.step()
        return loss.item()
    
    def predict(self, X):
        with torch.no_grad():
            return torch.argmax(self.forward(X), axis=1)
        
    
    def evaluate(self, test_dataloader):
        correct = 0
        for X, Y in test_dataloader:
            y_pred = self.predict(X)
            correct += (Y==y_pred).sum()
        acc = correct/(len(test_dataloader)*16)
        print(f"Accuracy : {acc}")

In [7]:
model = Model([128, 64, 10])

In [8]:
from torch.utils.data import DataLoader

BATCH = 16

train_dataloader  = DataLoader(data_train, batch_size=BATCH, shuffle=True)
test_dataloader = DataLoader(data_test, batch_size=BATCH, shuffle=True)

In [9]:
from tqdm import tqdm

epochs = 10

for i in range(epochs):
    total_loss = 0
    for X, Y in tqdm(train_dataloader, desc=f"Fitting epoch {i}"):
        loss = model.fit(X, Y)
        total_loss += loss
    avg_loss = total_loss/len(train_dataloader)
    print(f"Epoch {i}: {avg_loss:.4f}")    

Fitting epoch 0: 100%|██████████| 3750/3750 [00:17<00:00, 209.46it/s]


Epoch 0: 2.1928


Fitting epoch 1: 100%|██████████| 3750/3750 [00:17<00:00, 215.99it/s]


Epoch 1: 1.9789


Fitting epoch 2: 100%|██████████| 3750/3750 [00:18<00:00, 207.18it/s]


Epoch 2: 1.8779


Fitting epoch 3: 100%|██████████| 3750/3750 [00:20<00:00, 186.93it/s]


Epoch 3: 1.8035


Fitting epoch 4: 100%|██████████| 3750/3750 [00:18<00:00, 202.18it/s]


Epoch 4: 1.7551


Fitting epoch 5: 100%|██████████| 3750/3750 [00:21<00:00, 171.22it/s]


Epoch 5: 1.7223


Fitting epoch 6: 100%|██████████| 3750/3750 [00:17<00:00, 216.22it/s]


Epoch 6: 1.6890


Fitting epoch 7: 100%|██████████| 3750/3750 [00:17<00:00, 211.21it/s]


Epoch 7: 1.6503


Fitting epoch 8: 100%|██████████| 3750/3750 [00:15<00:00, 234.81it/s]


Epoch 8: 1.6304


Fitting epoch 9: 100%|██████████| 3750/3750 [00:15<00:00, 236.73it/s]

Epoch 9: 1.6160





In [10]:
model.evaluate(test_dataloader)

Accuracy : 0.8651999831199646


In [11]:
path = "./mnist_model.pth"
torch.save(model, path)