In [1]:
import os, json
from PIL import Image
import numpy as np

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn import CTCLoss

In [2]:
from data.dataset import TextDataset as TDataset
from data.data_utils import collate_fn
from trainer.train import train
from trainer.sequence_decoder import ctc_decode
from modeling.model_utils import load_model

from configs.config_crnn import train_config
from configs.dataconfig import (
    train_source, val_source, mapper, test_sources
)
from utils.augment import Augmentation

In [6]:
def define_crnn_model(cfg, num_class, reload_checkpoint = ''):
    from modeling.crnn import Crnn
    config = cfg

    crnn = Crnn(
        1, 
        num_class,
        map_to_seq_hidden=config['map_to_seq_hidden'],
        rnn_hidden=config['rnn_hidden'],
        leaky_relu=config['leaky_relu']
    )

    if reload_checkpoint:
        crnn = load_model(crnn, reload_checkpoint)
        print('model loaded successfully')

    return crnn

In [4]:
from BnTokenizer import TrieTokenizer
from BnTokenizer.base import BnGraphemizer

tokenizer = BnGraphemizer(
    tokenizer_class=TrieTokenizer,
    max_len=64,
    normalize_unicode=True,
    normalization_mode='NFKC',
    normalizer="unicode",
    printer=print
)

graphemes = json.load(open("graphemes.json", 'r'))
tokenizer.add_tokens(graphemes,reset_oov=True)

Selected Tokenizer: TrieTokenizer
Max Sequence Length: 64
Normalize Text: True
Normalizar: unicode
Normalization Mode: NFKC
update completed.[2143] new vocabs added. Current vocab count: 2145


In [7]:
device = torch.device(
    'cuda' if torch.cuda.is_available() else 'cpu'
)
model = define_crnn_model(train_config, len(tokenizer.vocab) + 1)
model.to(device)

Crnn(
  (features_extractor): FeatureExtractor(
    (feature_extractor): ModuleList(
      (0): Convolution(
        (conv): Sequential(
          (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
        )
      )
      (1): Convolution(
        (conv): Sequential(
          (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
        )
      )
      (2): Convolution(
        (conv): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNo

In [9]:
checkpoint =  torch.load(
    "/mnt/JaHiD/Zahid/RnD/TokenizerForBengaliTextRecognition/artifacts/crnn/CRNN+GRAPHEMIZER+BTHR+Boise/crnn_044500_loss_0.8612_acc_0.8996.pt"
)
model.load_state_dict(checkpoint['model'])
model.to(device)
model.eval()

Crnn(
  (features_extractor): FeatureExtractor(
    (feature_extractor): ModuleList(
      (0): Convolution(
        (conv): Sequential(
          (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
        )
      )
      (1): Convolution(
        (conv): Sequential(
          (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
        )
      )
      (2): Convolution(
        (conv): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNo

In [10]:
import pandas as pd
from utils.utils import levenshtein_distance
from trainer.evaluate import predict


def inference(
    cfg, model, inf_loader,tokenizer,
    decode_method='beam_search',
    beam_size=10,
    save_image=True,
    save_excel_report=True
):
    
    fun = lambda x: ''.join([tokenizer.vocab[i] for i in x]).replace('<oov>', '▁')
    prediciton_info = predict(
        model, inf_loader,tokenizer,
        decode_method=decode_method,
        beam_size=beam_size
    )
    (all_gts, all_preds, _, who_are_we) = prediciton_info

    report = pd.DataFrame.from_dict((
        {
            'GroundTruth':list(map(fun,all_gts)),
            'Prediction': list(map(fun,all_preds))
        }
    ))

    report['Edit Distance'] = list(
        map(lambda x: levenshtein_distance(*x,True),
        zip(report['GroundTruth'], report['Prediction']))
    )
    report['GT Length'] = [len(i) for i in report['GroundTruth']]
    report['Split'] = [i.split('|')[0] for i in who_are_we]
    report['Path'] =  [i.split('|')[-1] for i in who_are_we]

    if save_excel_report:
        saving_dir = f"{cfg['checkpoints_dir']}/report.checkpoints.{who_are_we[0].split('|')[0]}.test.xlsx"
        report.to_excel(
            f"{cfg['checkpoints_dir']}/report.checkpoints.{who_are_we[0].split('|')[0]}.test.textonly.xlsx"
        )

    if save_image:
        writer = pd.ExcelWriter(saving_dir, engine='xlsxwriter')
        report.to_excel(writer, sheet_name='Sheet1')
        workbook  = writer.book
        worksheet = writer.sheets['Sheet1']

        i = 2
        for img in report['Path']:
            worksheet.insert_image(f'H{i}', img)
            i += 1
        writer.save()

    return report

In [11]:
test_sources = {
    "boise_camera_test": {
        'data': '/home/jahid/Music/bn_dataset/boiseState/camera/split/test_annotaion.json',
        'base_dir': '/home/jahid/Music/bn_dataset/boiseState/camera/split/test_crop_images',
        'id': 'boise_camera_test'
    },
    "boise_scan_test": {
        'data': '/home/jahid/Music/bn_dataset/boiseState/scan/split/test_annotaion.json',
        'base_dir': '/home/jahid/Music/bn_dataset/boiseState/scan/split/test_crop_images',
        'id': 'boise_scan_test'
    },
    "boise_conjunct_test": {
        'data': '/home/jahid/Music/bn_dataset/boiseState/conjunct/split/test_annotaion.json',
        'base_dir': '/home/jahid/Music/bn_dataset/boiseState/conjunct/split/test_crop_images',
        'id': 'boise_conjunct_test'
    }
}


In [17]:
from data.data_source_controller import DataSourceController
for k, v in test_sources.items():
    if k not in  [
        # 'bn_htr_test',
        # 'boise_scan_test',
        # 'boise_camera_test',
        'boise_conjunct_test'
    ]: 
        continue
    process_text = lambda x : x.replace('\u200c','').replace("\u200d", '')
    val_data = DataSourceController(filter=lambda x: len(x.label)<30, transform= process_text)

    print(k,v.get('n'))
    val_data.add_data(**v)

    val_dataset = TDataset(
        val_data.data,
        tokenizer,
        img_height= 32,
        img_width= 128
    )

    val_dataloader = DataLoader(
        val_dataset, 
        batch_size= train_config['train_batch_size'], 
        collate_fn=collate_fn, 
        prefetch_factor = 1,
        num_workers = 4
    )
    report = inference(
        train_config, model, val_dataloader,tokenizer,
        save_image=True
    )

boise_conjunct_test None
Out of 725 boise_conjunct_test,725 are kept after filtering
Total data 725
Total 725 Images found!!!
0/6 is in progress
1/6 is in progress
2/6 is in progress
3/6 is in progress
4/6 is in progress
5/6 is in progress


  writer.save()


In [19]:
# Save Misprediciton Log
import glob

for xl in sorted(glob.glob(f"{train_config['checkpoints_dir']}/*only.xlsx")):
    _report = pd.read_excel(xl, engine='openpyxl').fillna('')
    _report = _report[_report['Edit Distance']>0]

    writer = pd.ExcelWriter(xl.replace('.xlsx', '.error.xlsx'), engine='xlsxwriter')
    _report.to_excel(writer, sheet_name='Sheet1')
    workbook  = writer.book
    worksheet = writer.sheets['Sheet1']

    i = 2
    for img in _report['Path']:
        worksheet.insert_image(f'K{i}', img)
        i += 1
    writer.save()

  writer.save()


In [20]:
import torchmetrics
import pandas as pd
import glob


process_text = lambda x : x.replace('\u200c','')
for xl in sorted(glob.glob(f"{train_config['checkpoints_dir']}/*only.xlsx")):
    _report = pd.read_excel(xl).fillna('')
    try:
        _report['GroundTruth'] = _report['GroundTruth'].apply(process_text)
        _report['Prediction'] = _report['Prediction'].apply(process_text)
        print(xl.split('/')[-1])
        print("    Char Error Rate",torchmetrics.CharErrorRate()(_report['GroundTruth'], _report['Prediction']))
        print("    Word Error Rate",sum(_report['GroundTruth'] !=_report['Prediction'])/len(_report))
        print("    Word Accuracy",sum(_report['GroundTruth'] ==_report['Prediction'])/len(_report))
    except Exception as e:
        print(e)
        print(xl.split('/')[-1])

report.checkpoints.boise_conjunct_test.test.textonly.xlsx
    Char Error Rate tensor(0.0637)
    Word Error Rate 0.18068965517241378
    Word Accuracy 0.8193103448275862
