In [3]:
from model import *
from utils import *

In [2]:
PATH = './CCPD2019-dl1'

photos = os.listdir(PATH+'/test')

In [4]:
# Let's show our data
num = 0
img = cv.imread(PATH+'/test/'+photos[num])
print(photos[num].split('-')[1].split('.')[0][1:])
print('original shape', img.shape)

# resising
dim = (256, 128)
img_resized = cv.resize(img, dim, interpolation = cv.INTER_AREA)
print('resised shape', img_resized.shape)

# converting to black/white
img_grey = cv.cvtColor(img_resized, cv.COLOR_BGR2GRAY)
print('black/white shape', img_grey.shape)

# show pic
cv.imshow('image', img_grey)

AYX440
original shape (92, 217, 3)
resised shape (128, 256, 3)
black/white shape (128, 256)


In [5]:
# data

data_train = get_data(PATH+'/train/')
data_test = get_data(PATH+'/test/')

100%|██████████| 199981/199981 [01:00<00:00, 3329.07it/s]
100%|██████████| 9999/9999 [00:03<00:00, 2754.04it/s]


In [6]:
# creating DataLoaders

from torch.utils.data import DataLoader
train_dataloader = DataLoader(data_train, batch_size=128, shuffle=True)
test_dataloader = DataLoader(data_test, batch_size=128, shuffle=True)

len(train_dataloader), len(test_dataloader)

(1563, 79)

In [7]:
for x, y in train_dataloader:
    num = 2
    print(x[num].shape)
    print(y[num].shape)
    break

torch.Size([128, 256])
torch.Size([6])


In [7]:
input_size = 512
hidden_size = 256
num_layers = 2
output_size = len(available_characters) + 1 # output_size = 37
lr = 0.5*10**(-4)

model = CRNN(input_size, hidden_size, num_layers, output_size)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [9]:
print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 8,647,397 trainable parameters


In [9]:
device = 'cpu'
model = model.to(device)

In [None]:
# for training model

import math
import time

train_history = []
valid_history = []

N_EPOCHS = 2
CLIP = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, device, train_dataloader, optimizer, criterion, CLIP, train_history, valid_history)
    valid_loss = evaluate(model, device, test_dataloader, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), './best-val-model.pt')
    
    train_history.append(train_loss)
    valid_history.append(valid_loss)
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

In [8]:
# Let's load our pretrained weights

model.load_state_dict(torch.load('./model_1.pth'))
model.eval()

CRNN(
  (cnn): CNN(
    (block_0): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (6): ReLU()
    )
    (block_1): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True

In [11]:
# Let's check accuracy on test data

acc = 0
for img, text in tqdm(test_dataloader):
    pred = model(img)
    acc += accuracy(pred, text)

print('test accuracy ', round(acc/len(test_dataloader)*100, 2), '%')

100%|██████████| 79/79 [03:01<00:00,  2.29s/it]

test accuracy  97.51 %





In [13]:
# Let's check CER metric on test data

cer_met = 0
for img, text in tqdm(test_dataloader):
    pred = model(img)
    cer_met += CER_metric(pred, text)

print('test CER metric ', round(cer_met.item()/len(test_dataloader)*100, 2), '%')

100%|██████████| 79/79 [03:02<00:00,  2.31s/it]

test CER metric  99.44 %





In [14]:
# Let's check our model on the different photo

num = 0
path = './Photo/'
photos_test = os.listdir(path)
img = cv.imread(path+photos_test[num])
print(photos_test[num].split('.')[0])
print('original shape', img.shape)
dim = (256, 128)
img_resized = cv.resize(img, dim, interpolation = cv.INTER_AREA)
img_grey = cv.cvtColor(img_resized, cv.COLOR_BGR2GRAY)
print('new image shape', img_grey.shape)

pred = model(torch.from_numpy(img_grey).float().unsqueeze(0))
print('pred shape', pred.shape)
print('pred text', decoder_text(pred)[0])

AVK308
original shape (116, 344, 3)
new image shape (128, 256)
pred shape torch.Size([1, 37, 6])
pred text avk308
