In [80]:
import argparse
import json
from typing import Tuple, List

import cv2
import editdistance
from path import Path
from model import Model
from preprocessor import Preprocessor
import os
import sys
from typing import List, Tuple

import numpy as np
import tensorflow as tf

from dataloader_iam import Batch
from dataloader_iam import DataLoaderIAM, Batch
from torch.autograd import Variable
import pandas as pd

# import ctcdecode


In [81]:
fn_char_list = '../model/charList.txt'
fn_summary = '../model/summary.json'
fn_corpus = '../data/corpus.txt'

In [82]:
decoder_type = 2

# train or validate on IAM dataset
batch_size=80000

loader = DataLoaderIAM(data_dir= Path('data/'), batch_size=batch_size, fast=False)
char_list = loader.char_list
# when in line mode, take care to have a whitespace in the char list
if  ' ' not in char_list:
    char_list = [' '] + char_list

# save words contained in dataset into file
open('data/corpus.txt', 'w').write(' '.join(loader.train_words + loader.validation_words))
print(loader.train_samples[5])

Ignoring known broken image: data/img/a01/a01-117/a01-117-05-02.png
Ignoring known broken image: data/img/r06/r06-022/r06-022-03-05.png
Sample(gt_text='lurched', file_path=Path('data/img/k04/k04-054/k04-054-07-05.png'))


In [83]:
validation_samples= loader.get_valid_samples() #This will output a tuple of two lists (img, labels)
train_samples=loader.get_train_samples() #This will output a tuple of two lists (img, labels)

In [84]:
len(validation_samples[0])
len(train_samples[0])

109552

In [85]:
line_mode=False
preprocessor = Preprocessor((128,32), line_mode=line_mode) #256 if lines, 128 if word
valid_samples_batch= Batch(validation_samples[0],validation_samples[1],len(validation_samples[0]))
finalized_validation= preprocessor.process_batch(valid_samples_batch)
validation_set_f= (finalized_validation[0],finalized_validation[1])


In [86]:
train_samples_batch= Batch(train_samples[0],train_samples[1],len(train_samples[0]))
finalized_training= preprocessor.process_batch(train_samples_batch)
training_set_f= (finalized_training[0],finalized_training[1])


In [87]:
def From_tuple_to_list(tuplez):
    validation_list= list()
    print(len(tuplez[0]))
    for i in range(len(tuplez[0])):
        validation_list.append((tuplez[0][i],tuplez[1][i]))
    return validation_list


validation_ready= From_tuple_to_list(validation_set_f)
training_ready= From_tuple_to_list(training_set_f)


5766
109552


In [88]:
training_loader= torch.utils.data.DataLoader(training_ready, batch_size=32, shuffle=True)
validation_loader= torch.utils.data.DataLoader(validation_ready, batch_size=32, shuffle=True)

# Model

In [89]:
import torch.nn as nn
import torch.nn.functional as F
import torch


class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class HTRNet(nn.Module):
    def __init__(self, nclasses):
        super(HTRNet, self).__init__()
        
        cnn_cfg = [(2, 32), 'M', (4, 64), 'M', (6, 128), 'M', (2, 256)]
        
        in_channels = 1
        self.features = nn.ModuleList([])
        cntm = 0
        cnt = 1
        for m in cnn_cfg:
            if m == 'M':
                self.features.add_module('mxp' + str(cntm), nn.MaxPool2d(kernel_size=2, stride=2))
                cntm += 1
            else:
                for i in range(m[0]):
                    x = m[1]
                    self.features.add_module('cnv' + str(cnt), nn.Conv2d(in_channels, x, 3, 1, 1, bias=True))
                    in_channels = x
                    self.features.add_module('nl' + str(cnt), nn.Sequential(nn.BatchNorm2d(x, momentum=.5), nn.ReLU()))
                    cnt += 1


        rnn_in = cnn_cfg[-1][-1]
        hidden=256
        num_layers =1
        self.rec = nn.LSTM(rnn_in, hidden, num_layers=num_layers, bidirectional=True)

        self.fnl = nn.Sequential(nn.Linear(2*hidden, 512), nn.ReLU(), nn.Dropout(.5), nn.Linear(512, nclasses))

    def forward(self, x):
        y = x
        for nn_module in self.features:
            y = nn_module(y)
        y = F.max_pool2d(y, [y.size(2), 1], padding=[0, 0])
        y = y.permute(2, 3, 0, 1)[0]  
        y = self.rec(y)[0]
        y = self.fnl(y)

        return y



In [91]:
classes = '_!"#&\'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz '
max_epochs = 20
batch_size = 1
iter_size = 16
nlr = 1e-4
net = HTRNet(len(classes))

criterion = torch.nn.CTCLoss()

net_parameters = net.parameters()

optimizer = torch.optim.Adam(net_parameters, nlr, weight_decay=0.00005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [int(.5 * max_epochs), int(.75 * max_epochs)])

# decoder = ctcdecode.CTCBeamDecoder([c for c in classes], beam_width=100)




In [92]:
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [94]:
iteration=[]
train_accu=[]
losses=[]
epochs_arr=[]
train_accu_per_epoch=[]
loss_per_epoch=[]
epochs = 30
net.train()
device= device = torch.device('cpu')
#device = torch.device('cpu')
for epoch in range(epochs):
    print("Epoch number:  ", epoch)
    for i, data in enumerate(training_loader, 0):
        images, labels = data
        # images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = net(images)
        output.to(device)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        iteration.append(i)
        train_accu.append(compute_accuracy(net,validation_loader))
        losses.append(loss)
        if i % 32 == 31:
            acc = compute_accuracy(net, validation_loader)
            print("Accuracy:  ", acc)
            net.train()
    epochs_arr.append(epoch)
    train_accu_per_epoch.append(acc)
    loss_per_epoch.append(loss)

Epoch number:   0


RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 3, 3], but got 3-dimensional input of size [32, 128, 32] instead

('bit', ',', 'Di', ',', '"')