In [1]:
import numpy as np

In [2]:
import os

if 'utils.py' not in os.listdir():
  !wget https://raw.githubusercontent.com/ChiThang-50Cent/text-recognizer-labs/main/base/utils.py

--2023-10-20 14:48:00--  https://raw.githubusercontent.com/ChiThang-50Cent/text-recognizer-labs/main/base/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2454 (2.4K) [text/plain]
Saving to: ‘utils.py’


2023-10-20 14:48:00 (48.2 MB/s) - ‘utils.py’ saved [2454/2454]



In [3]:
import torch
import torchvision
import utils as f
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader

In [4]:
train_set, valid_set, test_set = f.get_EMNIST_datasets()

Downloading https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip to ./EMNIST/raw/gzip.zip


100%|██████████| 561753746/561753746 [00:34<00:00, 16443212.57it/s]


Extracting ./EMNIST/raw/gzip.zip to ./EMNIST/raw
train_size: 101520, valid_size: 11280, test_size: 18800


In [5]:
f.set_all_seed(42)

device = f.get_device()
print(device)

cuda


In [6]:
batch_size = 64

train_loader = DataLoader(train_set, batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size, shuffle=False)

In [7]:
check = np.array([])
for x, y in test_loader:
  check = np.concatenate((check, y), axis=None)

print(len(set(check)), min(check), max(check))

47 0.0 46.0


In [8]:
class ConvBlock(nn.Module):
  def __init__(self, input_channel, output_channel):
    super().__init__()
    self.conv = nn.Conv2d(input_channel, output_channel, kernel_size=3, stride=1, padding=1)
    self.relu = nn.ReLU()

  def forward(self, x):
    c = self.conv(x)
    r = self.relu(c)

    return r

In [9]:
class ResidualBlock(nn.Module):
    def __init__(self, in_dim, out_dim, stride=1) -> None:
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=stride, padding=1),
            nn.BatchNorm2d(out_dim),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_dim),
        )

    def forward(self, x):

        residual = x

        out = self.conv1(x)
        out = self.conv2(out)
        out += residual

        out = nn.functional.relu(out)

        return out

In [10]:
class CNN(nn.Module):
  def __init__(self, img_size, input_dim, num_classes, conv_dim, fc_dim):
    super().__init__()
    self.conv1 = ConvBlock(input_dim, conv_dim)
    self.residual = ResidualBlock(conv_dim, conv_dim)
    # self.conv2 = ConvBlock(conv_dim, conv_dim)
    self.dropout = nn.Dropout(0.25)
    self.maxpool = nn.MaxPool2d(2)

    conv_out_size = img_size // 2
    self.fc1 = nn.Linear(conv_out_size * conv_out_size * conv_dim, fc_dim)
    self.fc2 = nn.Linear(fc_dim, num_classes)

  def forward(self, x):

    x = self.conv1(x)
    x = self.maxpool(x)
    x = self.residual(x)
    x = self.dropout(x)

    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = nn.functional.relu(x)
    x = self.fc2(x)

    return x

In [11]:
img_size = 28
fc_dim = 128
conv_dim = 64
num_classes = 47
input_dim = 1

In [12]:
CNN_model = CNN(img_size, input_dim, num_classes, conv_dim, fc_dim)
CNN_model.cuda()

CNN(
  (conv1): ConvBlock(
    (conv): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
  )
  (residual): ResidualBlock(
    (conv1): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (conv2): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (dropout): Dropout(p=0.25, inplace=False)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=47, bias=True)
)

In [13]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(CNN_model.parameters(), lr=0.01, weight_decay=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 30], gamma=0.1)

In [None]:
f.training_loop(n_epochs=30,
                model=CNN_model,
                train_loader=train_loader,
                val_loader=valid_loader,
                loss_fn=loss_fn,
                optimizer=optimizer,
                scheduler=None,
                device=device)

In [None]:
CNN_model.eval()
CNN_model.cpu()

y_pred = []
y_true = []

with torch.no_grad():
  for X, y in test_loader:

    y_hat = CNN_model(X)

    y_pred.append(y_hat.softmax(dim=1).argmax(dim=1).numpy())
    y_true.append(y.numpy())

y_pred = np.concatenate(y_pred, axis=None)
y_true = np.concatenate(y_true, axis=None)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred))