## 1- Installing prerequisites and cloning the project

In [1]:
!pip install -q lmdb fire gdown
!git clone https://github.com/clovaai/deep-text-recognition-benchmark.git

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/87.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.8/297.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fire (setup.py) ... [?25l[?25hdone
Cloning into 'deep-text-recognition-benchmark'...
remote: Enumerating objects: 499, done.[K
remote: Counting objects: 100% (225/225), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 499 (delta 208), reused 200 (delta 200), pack-reused 274 (from 1)[K
Receiving objects: 100% (499/499), 3.05 MiB | 6.90 MiB/s, done.
Resolving deltas: 100% (308/308), done.


## 2- Downloading pretrained weights and dataset

In [2]:
# دانلود مدل آموزش‌دیده
!gdown 1ubkg7E2vGEOqS4K_quwf9Vl-i8IVpklM  # مدل .pth
!gdown 1AL5Zsg2hDqcwF8ZmR0MJTbjgXIoE5W-I  # دوتا فایل zip

# باز کردن zip ها
!unzip -q /content/plate_img-train.zip -d /content/Dataset/
!unzip -q /content/plate_img-validation.zip -d /content/Dataset/


Downloading...
From (original): https://drive.google.com/uc?id=1ubkg7E2vGEOqS4K_quwf9Vl-i8IVpklM
From (redirected): https://drive.google.com/uc?id=1ubkg7E2vGEOqS4K_quwf9Vl-i8IVpklM&confirm=t&uuid=0c41c71a-e4c5-413b-9566-6146702c747b
To: /content/plate_img-train.zip
100% 196M/196M [00:04<00:00, 47.5MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1AL5Zsg2hDqcwF8ZmR0MJTbjgXIoE5W-I
From (redirected): https://drive.google.com/uc?id=1AL5Zsg2hDqcwF8ZmR0MJTbjgXIoE5W-I&confirm=t&uuid=7fb8498e-0c94-43d2-979b-a85ac19e7a09
To: /content/plate_img-validation.zip
100% 27.2M/27.2M [00:00<00:00, 63.1MB/s]


## 3- Preparing GT (Ground Truth) files for training

In [3]:
import os
import xml.etree.ElementTree as ET

train_dir = '/content/Dataset/train'
output_file = '/content/deep-text-recognition-benchmark/data/gt_train.txt'
os.makedirs(os.path.dirname(output_file), exist_ok=True)
lines = []

for filename in os.listdir(train_dir):
    if filename.endswith('.xml'):
        xml_path = os.path.join(train_dir, filename)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        image_name = root.find('filename').text
        image_path = os.path.join('/content/deep-text-recognition-benchmark/data/train', image_name)

        chars = []
        for obj in root.findall('object'):
            name = obj.find('name').text
            xmin = int(obj.find('bndbox').find('xmin').text)
            chars.append((xmin, name))

        chars.sort(key=lambda x: x[0])
        label = ''.join([char[1] for char in chars])
        lines.append(f'{image_path}\t{label}')

with open(output_file, 'w', encoding='utf-8') as f:
    f.write('\n'.join(lines))

print(f'Done! {len(lines)} items written to {output_file}')


Done! 19381 items written to /content/deep-text-recognition-benchmark/data/gt_train.txt


سلول 4 – آماده‌سازی فایل‌های GT برای validation

In [4]:
valid_dir = '/content/Dataset/validation'
output_file = '/content/deep-text-recognition-benchmark/data/gt_validation.txt'
lines = []

for filename in os.listdir(valid_dir):
    if filename.endswith('.xml'):
        xml_path = os.path.join(valid_dir, filename)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        image_name = root.find('filename').text
        image_path = os.path.join('/content/deep-text-recognition-benchmark/data/validation', image_name)


        chars = []
        for obj in root.findall('object'):
            name = obj.find('name').text
            xmin = int(obj.find('bndbox').find('xmin').text)
            chars.append((xmin, name))

        chars.sort(key=lambda x: x[0])
        label = ''.join([char[1] for char in chars])
        lines.append(f'{image_path}\t{label}')

with open(output_file, 'w', encoding='utf-8') as f:
    f.write('\n'.join(lines))

print(f'Done! {len(lines)} items written to {output_file}')


Done! 2805 items written to /content/deep-text-recognition-benchmark/data/gt_validation.txt


سلول 5 – جابه‌جایی تصاویر به مسیر data/train و data/validation

In [5]:
import shutil

# Move train images
src = '/content/Dataset/train'
dst = '/content/deep-text-recognition-benchmark/data/train'
os.makedirs(dst, exist_ok=True)

for file in os.listdir(src):
    if file.endswith('.jpg'):
        shutil.move(os.path.join(src, file), os.path.join(dst, file))

# Move validation images
src = '/content/Dataset/validation'
dst = '/content/deep-text-recognition-benchmark/data/validation'
os.makedirs(dst, exist_ok=True)

for file in os.listdir(src):
    if file.endswith('.jpg'):
        shutil.move(os.path.join(src, file), os.path.join(dst, file))

print("Train and validation images moved.")


Train and validation images moved.


🟢 سلول 6 – ساخت LMDB دیتاست

In [7]:
%cd /content/deep-text-recognition-benchmark

!python3 create_lmdb_dataset.py --inputPath data/train --gtFile data/gt_train.txt --outputPath dataset/train
!python3 create_lmdb_dataset.py --inputPath data/validation --gtFile data/gt_validation.txt --outputPath dataset/validation

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/content/deep-text-recognition-benchmark/data/train/130628.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/2274.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/134629.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/117267.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/2160.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/121891.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/128.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/135544.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/135490.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/4632.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/642.jpg does not exist
/content/deep-text-recognition-benchmark/data/train/135128.jpg does not exist
/content/de

In [9]:
!python3 train.py \
--train_data dataset/train --valid_data dataset/validation \
--select_data / --batch_ratio 1 --batch_max_length 8 --valInterval 100 \
--Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn


Filtering the images containing characters which are not in opt.character
Filtering the images whose label is longer than opt.batch_max_length
--------------------------------------------------------------------------------
dataset_root: dataset/train
opt.select_data: ['/']
opt.batch_ratio: ['1']
--------------------------------------------------------------------------------
dataset_root:    dataset/train	 dataset: /
sub-directory:	/.	 num samples: 0
num total samples of /: 0 x 1.0 (total_data_usage_ratio) = 0
num samples of / per batch: 192 x 1.0 (batch_ratio) = 192
Traceback (most recent call last):
  File "/content/deep-text-recognition-benchmark/train.py", line 317, in <module>
    train(opt)
  File "/content/deep-text-recognition-benchmark/train.py", line 31, in train
    train_dataset = Batch_Balanced_Dataset(opt)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/deep-text-recognition-benchmark/dataset.py", line 71, in __init__
    _data_loader = torch.utils.data.

/content/
│
├── Dataset/
│   ├── train/
│   │   ├── *.jpg
│   │   └── *.xml
│   └── validation/
│       ├── *.jpg
│       └── *.xml
│
└── deep-text-recognition-benchmark/
    ├── data/
    │   ├── gt_train.txt
    │   ├── gt_validation.txt
    │   ├── train/         ← عکس‌های train
    │   └── validation/    ← عکس‌های validation
    └── dataset/
        ├── train/         ← LMDB ساخته شده
        └── validation/    ← LMDB ساخته شده


In [None]:
def _accumulate(iterable):
    'Return running totals'
    total = 0
    for value in iterable:
        total += value
        yield total