<a href="https://colab.research.google.com/github/BlankZer0/Bangla-OCR-CRNN/blob/main/Bangla_OCR_with_CRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Mount GDrive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip -qq "/content/drive/My Drive/Colab Notebooks/BanglaWriting Extracted.zip"

In [3]:
# import libraries

import os
import glob
from tqdm import tqdm
import numpy as np
from pprint import pprint

import albumentations

from PIL import Image
from PIL import ImageFile

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn import preprocessing
from sklearn import model_selection
from sklearn import metrics

### Config

In [4]:
DATA_DIR = "/content/BanglaWriting Extracted/converted/"
BATCH_SIZE = 1
IMAGE_WIDTH = 200
IMAGE_HEIGHT = 100
NUM_WORKERS = 8
EPOCHS = 30
LEARNING_RATE = 3e-4
DEVICE = "cuda"

ImageFile.LOAD_TRUNCATED_IMAGES = True

### Dataset

In [5]:
image_files = glob.glob(os.path.join(DATA_DIR, "*.jpg"))
targets_orig = [x.split("_")[-1][:-4] for x in image_files]
targets = [[c for c in x] for x in targets_orig]
targets_flat = [c for clist in targets for c in clist]

lbl_enc = preprocessing.LabelEncoder()
lbl_enc.fit(targets_flat)
targets_enc = [lbl_enc.transform(x) for x in targets]
targets_enc = np.array(targets_enc)
targets_enc += 1

print(targets_enc)
print()
print("number of unique classes: " + str(len(lbl_enc.classes_)))

[array([89, 94, 78, 91, 61, 85, 92, 59]) array([45])
 array([ 49,  89, 101,  81,  92,  59,  91,  85]) ... array([57])
 array([45]) array([81, 97, 85])]

number of unique classes: 115


In [6]:
# train-test split

(train_imgs, test_imgs, train_targets, 
 test_targets, _, test_targets_orig,) = model_selection.train_test_split(
    image_files, targets_enc, targets_orig, test_size=0.1, random_state=42
    )

In [7]:
class ClassificationDataset(Dataset):
    def __init__(self, image_paths, targets, resize=None):
        # resize = (height, width)
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize

        mean = (0.485, 0.456, 0.406)
        std = (0.229, 0.224, 0.225)
        self.aug = albumentations.Compose(
            [
                albumentations.Normalize(
                    mean, std, max_pixel_value=255.0, always_apply=True
                )
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item]).convert("RGB")
        targets = self.targets[item]

        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )

        image = np.array(image)
        augmented = self.aug(image=image)
        image = augmented["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)

        return {
            "images": torch.tensor(image, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.long),
        }


### Engine

In [8]:
def train_fn(model, data_loader, optimizer):
    model.train()
    fin_loss = 0
    tk0 = tqdm(data_loader, total=len(data_loader))
    for data in tk0:
        for key, value in data.items():
            data[key] = value.to(DEVICE)
        optimizer.zero_grad()
        _, loss = model(**data)
        loss.backward()
        optimizer.step()
        fin_loss += loss.item()
    return fin_loss / len(data_loader)


def eval_fn(model, data_loader):
    model.eval()
    fin_loss = 0
    fin_preds = []
    with torch.no_grad():
        tk0 = tqdm(data_loader, total=len(data_loader))
        for data in tk0:
            for key, value in data.items():
                data[key] = value.to(DEVICE)
            batch_preds, loss = model(**data)
            fin_loss += loss.item()
            fin_preds.append(batch_preds)
        return fin_preds, fin_loss / len(data_loader)


### Model

In [9]:
class OCRModel(nn.Module):
    def __init__(self, num_chars):
        super(OCRModel, self).__init__()
        self.conv_1 = nn.Conv2d(3, 128, kernel_size=(3, 6), padding=(1, 1))
        self.pool_1 = nn.MaxPool2d(kernel_size=(2, 2))
        self.conv_2 = nn.Conv2d(128, 64, kernel_size=(3, 6), padding=(1, 1))
        self.pool_2 = nn.MaxPool2d(kernel_size=(2, 2))
        self.linear_1 = nn.Linear(1600, 64)
        self.drop_1 = nn.Dropout(0.2)
        self.lstm = nn.GRU(64, 32, bidirectional=True, num_layers=2, dropout=0.25, batch_first=True)
        self.output = nn.Linear(64, num_chars + 1)

    def forward(self, images, targets=None):
        bs, channel, height, width = images.size()
        x = F.relu(self.conv_1(images))
        # print(x.size())
        x = self.pool_1(x)
        # print(x.size())
        x = F.relu(self.conv_2(x))
        # print(x.size())
        x = self.pool_2(x)
        # print(x.size())
        x = x.permute(0, 3, 1, 2)
        # print(x.size())
        x = x.view(bs, x.size(1), -1)
        # print(x.size())
        x = F.relu(self.linear_1(x))
        # print(x.size())
        x = self.drop_1(x)
        # print(x.size())
        x, _ = self.lstm(x)
        # print(x.size())
        x = self.output(x)
        # print(x.size())
        x = x.permute(1, 0, 2)
        # print(x.size())

        if targets is not None:
            log_probs = F.log_softmax(x, 2)
            # print(log_probs.size())
            input_lengths = torch.full(
                size=(bs,), fill_value=log_probs.size(0), dtype=torch.int32
            )
            # print(input_lengths)
            target_lengths = torch.full(
                size=(bs,), fill_value=targets.size(1), dtype=torch.int32
            )
            # print(target_lengths)
            loss = nn.CTCLoss(blank=0)(
                log_probs, targets, input_lengths, target_lengths
            )
            return x, loss

        return x, None


In [10]:
model = OCRModel(len(lbl_enc.classes_))
img = torch.rand((16, 3, IMAGE_HEIGHT, IMAGE_WIDTH))
target = torch.randint(1, len(lbl_enc.classes_), (16, 10))
x, loss = model(img, target)

### Train

In [11]:
def remove_duplicates(x):
    if len(x) < 2:
        return x
    fin = ""
    for j in x:
        if fin == "":
            fin = j
        else:
            if j == fin[-1]:
                continue
            else:
                fin = fin + j
    return fin


def decode_predictions(preds, encoder):
    preds = preds.permute(1, 0, 2)
    preds = torch.softmax(preds, 2)
    preds = torch.argmax(preds, 2)
    preds = preds.detach().cpu().numpy()
    cap_preds = []
    for j in range(preds.shape[0]):
        temp = []
        for k in preds[j, :]:
            k = k - 1
            if k == -1:
                temp.append("§")
            else:
                p = encoder.inverse_transform([k])[0]
                temp.append(p)
        tp = "".join(temp).replace("§", "")
        cap_preds.append(remove_duplicates(tp))
    return cap_preds


def run_training():

    train_dataset = ClassificationDataset(
        image_paths=train_imgs,
        targets=train_targets,
        resize=(IMAGE_HEIGHT, IMAGE_WIDTH),
    )
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=True,
    )
    test_dataset = ClassificationDataset(
        image_paths=test_imgs,
        targets=test_targets,
        resize=(IMAGE_HEIGHT, IMAGE_WIDTH),
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=False,
    )

    model = OCRModel(num_chars=len(lbl_enc.classes_))
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, factor=0.8, patience=5, verbose=True
    )
    for epoch in range(EPOCHS):
        train_loss = train_fn(model, train_loader, optimizer)
        valid_preds, test_loss = eval_fn(model, test_loader)
        valid_captcha_preds = []
        for vp in valid_preds:
            current_preds = decode_predictions(vp, lbl_enc)
            valid_captcha_preds.extend(current_preds)
        combined = list(zip(test_targets_orig, valid_captcha_preds))

        pprint(combined[:10])
        test_dup_rem = [remove_duplicates(c) for c in test_targets_orig]
        accuracy = metrics.accuracy_score(test_dup_rem, valid_captcha_preds)
        
        print(
            f"Epoch={epoch}, Train Loss={train_loss}, Test Loss={test_loss} Accuracy={accuracy}"
        )
        scheduler.step(test_loss)


In [12]:
run_training()

100%|██████████| 18418/18418 [06:10<00:00, 49.65it/s]
100%|██████████| 2047/2047 [00:17<00:00, 113.93it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'ক'),
 ('বলছ', 'এ'),
 ('বয়সের', 'প'),
 (',', '।'),
 ('হয়', 'হ'),
 ('পড়েন', 'ক'),
 ('ভাষার', 'ক'),
 ('হয়', 'হ'),
 ('নিজেদের', 'প'),
 ('মুল', 'এ')]
Epoch=0, Train Loss=3.742495104861461, Test Loss=3.1631727629847566 Accuracy=0.07816316560820713


100%|██████████| 18418/18418 [06:14<00:00, 49.17it/s]
100%|██████████| 2047/2047 [00:17<00:00, 114.19it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'এ'),
 ('বলছ', 'ক'),
 ('বয়সের', 'আ'),
 (',', ','),
 ('হয়', 'হ'),
 ('পড়েন', 'আ'),
 ('ভাষার', 'আ'),
 ('হয়', 'ক'),
 ('নিজেদের', 'ব'),
 ('মুল', 'ক')]
Epoch=1, Train Loss=3.0376313227390734, Test Loss=2.8654288511685064 Accuracy=0.09574987787005373


100%|██████████| 18418/18418 [06:16<00:00, 48.89it/s]
100%|██████████| 2047/2047 [00:17<00:00, 116.25it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সে'),
 ('বলছ', 'ক'),
 ('বয়সের', 'কের'),
 (',', ','),
 ('হয়', 'হ'),
 ('পড়েন', 'নরে'),
 ('ভাষার', 'আর'),
 ('হয়', 'হ'),
 ('নিজেদের', 'পের'),
 ('মুল', 'সে')]
Epoch=2, Train Loss=2.7773310674193836, Test Loss=2.6178143108532046 Accuracy=0.10503175378602833


100%|██████████| 18418/18418 [06:12<00:00, 49.45it/s]
100%|██████████| 2047/2047 [00:17<00:00, 113.88it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সত'),
 ('বলছ', 'ক'),
 ('বয়সের', 'কসর'),
 (',', ','),
 ('হয়', 'হ'),
 ('পড়েন', 'পর'),
 ('ভাষার', 'আমার'),
 ('হয়', 'হ'),
 ('নিজেদের', 'সতেকের'),
 ('মুল', 'হল')]
Epoch=3, Train Loss=2.45452326817689, Test Loss=2.2161619276195967 Accuracy=0.1446018563751832


100%|██████████| 18418/18418 [06:13<00:00, 49.31it/s]
100%|██████████| 2047/2047 [00:17<00:00, 115.97it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সতে'),
 ('বলছ', 'ক'),
 ('বয়সের', 'কয়খের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দেন'),
 ('ভাষার', 'আমার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'হতেকের'),
 ('মুল', 'হল')]
Epoch=4, Train Loss=2.166253734304259, Test Loss=1.985967705594071 Accuracy=0.1695163654127992


100%|██████████| 18418/18418 [06:22<00:00, 48.21it/s]
100%|██████████| 2047/2047 [00:18<00:00, 108.46it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সতে'),
 ('বলছ', 'কলন'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দয়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'িনেকের'),
 ('মুল', 'হল')]
Epoch=5, Train Loss=1.963357761251535, Test Loss=1.8090614897422366 Accuracy=0.1968734733756717


100%|██████████| 18418/18418 [06:22<00:00, 48.19it/s]
100%|██████████| 2047/2047 [00:17<00:00, 115.81it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সয়ে'),
 ('বলছ', 'কলন'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পয়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'িতেকের'),
 ('মুল', 'সুন')]
Epoch=6, Train Loss=1.7894573214226532, Test Loss=1.6632613720673508 Accuracy=0.2256961406936981


100%|██████████| 18418/18418 [06:16<00:00, 48.90it/s]
100%|██████████| 2047/2047 [00:17<00:00, 116.10it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সয়ে'),
 ('বলছ', 'এল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পয়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নিনেকের'),
 ('মুল', 'সল')]
Epoch=7, Train Loss=1.6488516861431277, Test Loss=1.5780595854774635 Accuracy=0.23839765510503175


100%|██████████| 18418/18418 [06:20<00:00, 48.42it/s]
100%|██████████| 2047/2047 [00:18<00:00, 113.64it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সয়ো'),
 ('বলছ', 'কনয'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দয়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নিনেকের'),
 ('মুল', 'সুন')]
Epoch=8, Train Loss=1.5376965445325261, Test Loss=1.4411787321444367 Accuracy=0.2804103566194431


100%|██████████| 18418/18418 [06:19<00:00, 48.56it/s]
100%|██████████| 2047/2047 [00:18<00:00, 113.62it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পয়ে'),
 ('বলছ', 'বল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দয়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'িলেকের'),
 ('মুল', 'মুন')]
Epoch=9, Train Loss=1.455620421665157, Test Loss=1.42578380778259 Accuracy=0.2882266731802638


100%|██████████| 18418/18418 [06:28<00:00, 47.39it/s]
100%|██████████| 2047/2047 [00:18<00:00, 108.16it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়ে'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নতেকের'),
 ('মুল', 'সুন')]
Epoch=10, Train Loss=1.3839109969150443, Test Loss=1.3683795413086768 Accuracy=0.30532486565705913


100%|██████████| 18418/18418 [06:30<00:00, 47.15it/s]
100%|██████████| 2047/2047 [00:18<00:00, 110.54it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়োখ'),
 ('বলছ', 'বল্হ'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'তাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নিতেদের'),
 ('মুল', 'মুন')]
Epoch=11, Train Loss=1.3293671526404998, Test Loss=1.2831640635413706 Accuracy=0.3277967757694187


100%|██████████| 18418/18418 [06:26<00:00, 47.60it/s]
100%|██████████| 2047/2047 [00:18<00:00, 110.82it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়ো'),
 ('বলছ', 'এলছ'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'িদেদের'),
 ('মুল', 'সুন')]
Epoch=12, Train Loss=1.2800789083065784, Test Loss=1.2404287703109849 Accuracy=0.3419638495359062


100%|██████████| 18418/18418 [06:20<00:00, 48.39it/s]
100%|██████████| 2047/2047 [00:18<00:00, 113.32it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পজে'),
 ('বলছ', 'এল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'তাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'য়িদেদের'),
 ('মুল', 'মুন')]
Epoch=13, Train Loss=1.240614474010309, Test Loss=1.2351978921616276 Accuracy=0.336590131900342


100%|██████████| 18418/18418 [06:20<00:00, 48.40it/s]
100%|██████████| 2047/2047 [00:17<00:00, 114.80it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সড়েক'),
 ('বলছ', 'এলছ'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নিদেদের'),
 ('মুল', 'মুন')]
Epoch=14, Train Loss=1.2005854414506945, Test Loss=1.2065355132495903 Accuracy=0.3522227650219834


100%|██████████| 18418/18418 [06:19<00:00, 48.48it/s]
100%|██████████| 2047/2047 [00:18<00:00, 112.47it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'সড়েো'),
 ('বলছ', 'বল্ছ'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'তামার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'লিজেদের'),
 ('মুল', 'মুল')]
Epoch=15, Train Loss=1.1738421946239004, Test Loss=1.1999322434098032 Accuracy=0.3512457254518808


100%|██████████| 18418/18418 [06:18<00:00, 48.63it/s]
100%|██████████| 2047/2047 [00:18<00:00, 112.18it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়ে'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'তাসার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'লিদেদের'),
 ('মুল', 'মন')]
Epoch=16, Train Loss=1.1476673190475957, Test Loss=1.1857868104675602 Accuracy=0.35613092330239376


100%|██████████| 18418/18418 [06:23<00:00, 48.02it/s]
100%|██████████| 2047/2047 [00:18<00:00, 112.95it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়ো'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'য়িতেকের'),
 ('মুল', 'মুন')]
Epoch=17, Train Loss=1.1246704081151173, Test Loss=1.142661780007816 Accuracy=0.36492427943331707


100%|██████████| 18418/18418 [06:20<00:00, 48.36it/s]
100%|██████████| 2047/2047 [00:18<00:00, 113.36it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়েো'),
 ('বলছ', 'বল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'য়িজেদের'),
 ('মুল', 'মুন')]
Epoch=18, Train Loss=1.099388851817423, Test Loss=1.1358321789877546 Accuracy=0.3795798729848559


100%|██████████| 18418/18418 [06:19<00:00, 48.52it/s]
100%|██████████| 2047/2047 [00:18<00:00, 110.54it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'স্ড়ে'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'য়িজেদের'),
 ('মুল', 'মুন')]
Epoch=19, Train Loss=1.0745008823579327, Test Loss=1.1195791074945276 Accuracy=0.39081582804103565


100%|██████████| 18418/18418 [06:19<00:00, 48.49it/s]
100%|██████████| 2047/2047 [00:18<00:00, 113.38it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'এ্জেক'),
 ('বলছ', 'বল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'লিজেকের'),
 ('মুল', 'মুন')]
Epoch=20, Train Loss=1.0564828647767306, Test Loss=1.132181864571346 Accuracy=0.37322911577918905


100%|██████████| 18418/18418 [06:19<00:00, 48.57it/s]
100%|██████████| 2047/2047 [00:18<00:00, 112.79it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়েকে'),
 ('বলছ', 'বল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'সড়েন'),
 ('ভাষার', 'ভাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'রিজেদের'),
 ('মুল', 'মুন')]
Epoch=21, Train Loss=1.0406780027204314, Test Loss=1.1567621041804395 Accuracy=0.3751831949193942


100%|██████████| 18418/18418 [06:18<00:00, 48.66it/s]
100%|██████████| 2047/2047 [00:17<00:00, 114.31it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'প্ভেক'),
 ('বলছ', 'বলং'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়া'),
 ('নিজেদের', 'য়িদেদের'),
 ('মুল', 'মুন')]
Epoch=22, Train Loss=1.028300655909156, Test Loss=1.0820261191270297 Accuracy=0.4064484611626771


100%|██████████| 18418/18418 [06:17<00:00, 48.75it/s]
100%|██████████| 2047/2047 [00:18<00:00, 111.83it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পভে'),
 ('বলছ', 'বলছ'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'য়দেদের'),
 ('মুল', 'মুন')]
Epoch=23, Train Loss=1.0145708909022104, Test Loss=1.0962495447584208 Accuracy=0.3873961895456766


100%|██████████| 18418/18418 [06:18<00:00, 48.65it/s]
100%|██████████| 2047/2047 [00:18<00:00, 112.74it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পড়ে'),
 ('বলছ', 'বলছ'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'মিদেদের'),
 ('মুল', 'মুন')]
Epoch=24, Train Loss=1.004270894743933, Test Loss=1.0996740284877393 Accuracy=0.38006839276990717


100%|██████████| 18418/18418 [06:18<00:00, 48.66it/s]
100%|██████████| 2047/2047 [00:17<00:00, 115.63it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পভেক'),
 ('বলছ', 'বল'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'ভাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'থিদেদের'),
 ('মুল', 'মুন')]
Epoch=25, Train Loss=0.9856716930570911, Test Loss=1.1093385415996253 Accuracy=0.40107474352711286


100%|██████████| 18418/18418 [06:20<00:00, 48.34it/s]
100%|██████████| 2047/2047 [00:18<00:00, 112.20it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'পসেক'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'সড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়া'),
 ('নিজেদের', 'য়িদেদের'),
 ('মুল', 'মুন')]
Epoch=26, Train Loss=0.983444421364764, Test Loss=1.0932773285094761 Accuracy=0.39521250610649733


100%|██████████| 18418/18418 [06:18<00:00, 48.61it/s]
100%|██████████| 2047/2047 [00:18<00:00, 113.70it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'প্ভেক'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'পড়েন'),
 ('ভাষার', 'তাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নিদেদের'),
 ('মুল', 'মুন')]
Epoch=27, Train Loss=0.9629541397639394, Test Loss=1.1175920194193825 Accuracy=0.3839765510503175


100%|██████████| 18418/18418 [06:18<00:00, 48.71it/s]
100%|██████████| 2047/2047 [00:17<00:00, 114.22it/s]
  0%|          | 0/18418 [00:00<?, ?it/s]

[('প্রত্যেক', 'প্ভেক'),
 ('বলছ', 'বলহ'),
 ('বয়সের', 'বয়জের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দড়েন'),
 ('ভাষার', 'ভাখার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'নিদেদের'),
 ('মুল', 'মুন')]
Epoch=28, Train Loss=0.9535652368432064, Test Loss=1.0260649916825 Accuracy=0.4157303370786517


100%|██████████| 18418/18418 [06:18<00:00, 48.60it/s]
100%|██████████| 2047/2047 [00:18<00:00, 109.68it/s]


[('প্রত্যেক', 'এভেক'),
 ('বলছ', 'বলই'),
 ('বয়সের', 'বয়সের'),
 (',', ','),
 ('হয়', 'হয়'),
 ('পড়েন', 'দড়েন'),
 ('ভাষার', 'ভাষার'),
 ('হয়', 'হয়'),
 ('নিজেদের', 'য়িদেদের'),
 ('মুল', 'মুন')]
Epoch=29, Train Loss=0.9440893969319104, Test Loss=1.0432584442257535 Accuracy=0.41914997557401074
