In [8]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.__version__, device

('2.1.0+cu118', 'cuda')

In [9]:
import torchvision
from torchvision import datasets, transforms

from torch.utils.data import DataLoader

train_data = datasets.MNIST(
    root="data/",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)

test_data = datasets.MNIST(
    root="data/",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

In [10]:
train_dataloader = DataLoader(dataset=train_data, shuffle=True,
                              batch_size=40, num_workers=2)
test_dataloader = DataLoader(dataset=test_data, batch_size=40, num_workers=2)

In [None]:
Data = torch.load("drive/MyDrive/_Model.pth")


In [11]:
from torch import nn

class NumberModule(nn.Module):
  def __init__(self, in_channel, hidden_units, out_units) -> None:
    super().__init__()

    self.Conv_Layer_1 = nn.Sequential(
        nn.Conv2d(in_channel, hidden_units, kernel_size=(2, 2), stride=1),
        nn.ReLU(),
        nn.Conv2d(hidden_units, hidden_units, kernel_size=(2, 2), stride=1),
        nn.ReLU(),
        nn.MaxPool2d(2, 2)
    )
    self.Conv_Layer_2 = nn.Sequential(
        nn.Conv2d(hidden_units, hidden_units, kernel_size=(2, 2), stride=1),
        nn.ReLU(),
        nn.Conv2d(hidden_units, hidden_units, kernel_size=(2, 2), stride=1),
        nn.ReLU(),
        nn.MaxPool2d(2, 2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(5*5*hidden_units, out_units)
    )

  def forward(self, x : torch.Tensor) -> torch.Tensor:
    x = self.Conv_Layer_1(x)
    # print(x.shape)
    x = self.Conv_Layer_2(x)
    # print(x.shape)
    x = self.classifier(x)
    # print(x.shape)

    return x

_Model = NumberModule(1, 10, 10)
_Model

NumberModule(
  (Conv_Layer_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(2, 2), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (Conv_Layer_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(2, 2), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=250, out_features=10, bias=True)
  )
)

In [35]:
img, label = next(iter(test_dataloader))

img = img.to(device)
label = label.to(device)

y_pred = torch.softmax(_Model(img), dim=1).argmax(dim=1)

y_pred, label

(tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
         4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1], device='cuda:0'),
 tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
         4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1], device='cuda:0'))

In [23]:
Loss_FN = nn.CrossEntropyLoss()

_Optimizer = torch.optim.SGD(params=_Model.parameters(), lr=0.1)

In [26]:
_Epochs = 2

for i in range(_Epochs):
  _Model.train()

  train_Acc, train_loss = 0, 0

  for batch, (X, y) in enumerate(train_dataloader):
    X, y = X.to(device), y.to(device)

    y_pred = _Model(X)

    Loss = Loss_FN(y_pred, y)
    train_loss += Loss.item()

    y_pred_class = torch.softmax(y_pred, dim=1).argmax(dim=1)
    train_Acc += (y_pred_class == y).sum().item()/len(y_pred)

    _Optimizer.zero_grad()

    Loss.backward()

    _Optimizer.step()

    if batch % 400 == 0:
      print(f"Sampled: {batch*len(X)} samples")

  train_loss /= len(train_dataloader)
  train_Acc /= len(train_dataloader)

  test_loss, test_Acc = 0, 0

  _Model.eval()

  with torch.inference_mode():
    for batch, (X, y) in enumerate(test_dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      test_pred_logits = _Model(X)

      # 2. Calculate and accumulate loss
      loss = Loss_FN(test_pred_logits, y)
      test_loss += loss.item()

      # Calculate and accumulate accuracy
      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_Acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  test_Acc /= len(test_dataloader)
  test_loss /= len(test_dataloader)

  # 4. Print out what's happening
  print(
      f"Epoch: {i} | "
      f"train_loss: {train_loss:.4f} | "
      f"train_acc: {train_Acc:.4} % | "
      f"test_loss: {test_loss:.4f} | "
      f"test_acc: {test_Acc:.5f} %"
  )

Sampled: 0 samples
Sampled: 16000 samples
Sampled: 32000 samples
Sampled: 48000 samples
Epoch: 0 | train_loss: 0.0701 | train_acc: 0.978 % | test_loss: 0.0698 | test_acc: 0.97700 %
Sampled: 0 samples
Sampled: 16000 samples
Sampled: 32000 samples
Sampled: 48000 samples
Epoch: 1 | train_loss: 0.0646 | train_acc: 0.9796 % | test_loss: 0.0631 | test_acc: 0.98080 %


In [36]:
torch.save({
            'epoch': 5,
            'model_state_dict': _Model.state_dict(),
            'optimizer_state_dict': _Optimizer.state_dict,
            }, "drive/MyDrive/_Number.pth")