In [1]:
import numpy as np

In [2]:
import os

if 'utils.py' not in os.listdir():
  !wget https://raw.githubusercontent.com/ChiThang-50Cent/text-recognizer-labs/main/base/utils.py

--2023-10-19 17:51:59--  https://raw.githubusercontent.com/ChiThang-50Cent/text-recognizer-labs/main/base/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2421 (2.4K) [text/plain]
Saving to: ‘utils.py’


2023-10-19 17:51:59 (30.0 MB/s) - ‘utils.py’ saved [2421/2421]



In [3]:
import torch
import torchvision
import utils as f
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader

In [4]:
a = torch.tensor([1, 2, 3])
b = torch.tensor([3, 4, 5])

a+b

tensor([4, 6, 8])

In [4]:
train_set, valid_set, test_set = f.get_EMNIST_datasets()

Downloading https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip to ./EMNIST/raw/gzip.zip


100%|██████████| 561753746/561753746 [00:10<00:00, 54783682.57it/s]


Extracting ./EMNIST/raw/gzip.zip to ./EMNIST/raw
train_size: 101520, valid_size: 11280, test_size: 18800


In [5]:
f.set_all_seed(42)

device = f.get_device()
print(device)

cuda


In [6]:
batch_size = 64

train_loader = DataLoader(train_set, batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size, shuffle=False)

In [7]:
class ConvBlock(nn.Module):
  def __init__(self, input_channel, output_channel):
    super().__init__()
    self.conv = nn.Conv2d(input_channel, output_channel, kernel_size=3, stride=1, padding=1)
    self.relu = nn.ReLU()

  def forward(self, x):
    c = self.conv(x)
    r = self.relu(c)

    return r

In [8]:
class CNN(nn.Module):
  def __init__(self, img_size, input_dim, num_classes, conv_dim, fc_dim):
    super().__init__()
    self.conv1 = ConvBlock(input_dim, conv_dim)
    self.conv2 = ConvBlock(conv_dim, conv_dim)
    self.dropout = nn.Dropout(0.25)
    self.maxpool = nn.MaxPool2d(2)

    conv_out_size = img_size // 2
    self.fc1 = nn.Linear(conv_out_size * conv_out_size * conv_dim, fc_dim)
    self.fc2 = nn.Linear(fc_dim, num_classes)

  def forward(self, x):

    x = self.conv1(x)
    x = self.conv2(x)
    x = self.maxpool(x)
    x = self.dropout(x)

    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = nn.functional.relu(x)
    x = self.fc2(x)

    return x

In [9]:
check = np.array([])
for x, y in test_loader:
  check = np.concatenate((check, y), axis=None)


In [10]:
for x, y in test_loader:
  print(x.shape)
  break

torch.Size([64, 1, 28, 28])


In [11]:
len(set(check)), min(check), max(check)

(47, 0.0, 46.0)

In [12]:
img_size = 28
fc_dim = 128
conv_dim = 64
num_classes = 47
input_dim = 1

In [13]:
CNN_model = CNN(img_size, input_dim, num_classes, conv_dim, fc_dim)
CNN_model.cuda()

CNN(
  (conv1): ConvBlock(
    (conv): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
  )
  (conv2): ConvBlock(
    (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
  )
  (dropout): Dropout(p=0.25, inplace=False)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=47, bias=True)
)

In [14]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_model.parameters(), lr=5e-2)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 30], gamma=0.1)

In [15]:
f.training_loop(n_epochs=50,
                model=CNN_model,
                train_loader=train_loader,
                val_loader=valid_loader,
                loss_fn=loss_fn,
                optimizer=optimizer,
                scheduler=scheduler,
                device=device)

Epoch 1 in 50 total. Train loss 4.27. Valid loss 3.86. Valid acc 0.02.
Epoch 2 in 50 total. Train loss 3.86. Valid loss 3.86. Valid acc 0.02.
Epoch 3 in 50 total. Train loss 3.85. Valid loss 3.84. Valid acc 0.03.
Epoch 4 in 50 total. Train loss 3.76. Valid loss 3.61. Valid acc 0.05.
Epoch 5 in 50 total. Train loss 3.09. Valid loss 2.21. Valid acc 0.43.
Epoch 6 in 50 total. Train loss 1.91. Valid loss 1.47. Valid acc 0.59.
Epoch 7 in 50 total. Train loss 1.50. Valid loss 1.22. Valid acc 0.65.
Epoch 8 in 50 total. Train loss 1.31. Valid loss 1.09. Valid acc 0.69.
Epoch 9 in 50 total. Train loss 1.18. Valid loss 0.99. Valid acc 0.71.
Epoch 10 in 50 total. Train loss 1.09. Valid loss 0.92. Valid acc 0.73.
Epoch 11 in 50 total. Train loss 1.02. Valid loss 0.86. Valid acc 0.74.
Epoch 12 in 50 total. Train loss 0.96. Valid loss 0.81. Valid acc 0.76.
Epoch 13 in 50 total. Train loss 0.91. Valid loss 0.77. Valid acc 0.77.
Epoch 14 in 50 total. Train loss 0.87. Valid loss 0.74. Valid acc 0.78.
E

In [16]:
CNN_model.eval()
CNN_model.cpu()

y_pred = []
y_true = []

with torch.no_grad():
  for X, y in test_loader:

    y_hat = CNN_model(X)

    y_pred.append(y_hat.softmax(dim=1).argmax(dim=1).numpy())
    y_true.append(y.numpy())

y_pred = np.concatenate(y_pred, axis=None)
y_true = np.concatenate(y_true, axis=None)

In [17]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.68      0.66      0.67       400
           1       0.52      0.73      0.61       400
           2       0.91      0.81      0.86       400
           3       0.95      0.96      0.96       400
           4       0.87      0.89      0.88       400
           5       0.92      0.86      0.89       400
           6       0.88      0.91      0.89       400
           7       0.92      0.95      0.94       400
           8       0.89      0.91      0.90       400
           9       0.64      0.82      0.72       400
          10       0.91      0.93      0.92       400
          11       0.91      0.93      0.92       400
          12       0.92      0.91      0.91       400
          13       0.88      0.92      0.90       400
          14       0.96      0.96      0.96       400
          15       0.64      0.60      0.62       400
          16       0.91      0.91      0.91       400
          17       0.90    