In [1]:
import sys

sys.path.append("../")

In [2]:
import os
import copy
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger
from lightning_ocr.models import ABINetVision
from lightning_ocr.datasets import RecogTextDataset, RecogTextDataModule
from sklearn.model_selection import train_test_split
import albumentations as A
import lightning as L

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [3]:
# ENV
os.environ["TOKENIZERS_PARALLELISM"] = "true"

batch_size = 8

config = {
    "max_seq_len": 12,
    "tokenizer": {
        "dict_list": list("0123456789."),
    },
}

In [4]:
# MODEL

model = ABINetVision(config)

In [5]:
# DATASETS

train_dataset = RecogTextDataset(
    data_root="./datasets/MNIST/",
    ann_file="ann_file.json",
    pipeline=model.load_train_pipeline(),
)

TRAIN, TEST = train_test_split(train_dataset.data_list, test_size=0.2, random_state=42)

test_dataset = copy.deepcopy(train_dataset)
test_dataset.data_list = TEST
test_dataset.transform = A.Compose(model.load_test_pipeline())
train_dataset.data_list = TRAIN

In [6]:
# CONFIG TRANER

log_every_n_steps = 50
if len(train_dataset) // batch_size < 50:
    log_every_n_steps = 5

checkpoint_callback = ModelCheckpoint(
    dirpath="./checkpoints/abinet",
    filename="model-{epoch:02d}-loss-{loss/total_epoch:.2f}",
    monitor="loss/total_epoch",
    save_weights_only=True,
    auto_insert_metric_name=False,
    every_n_epochs=1,
)

tb_logger = TensorBoardLogger(save_dir="logs/abinet/")

trainer = L.Trainer(
    precision="16-mixed",
    logger=tb_logger,
    log_every_n_steps=log_every_n_steps,
    callbacks=[checkpoint_callback],
    max_epochs=20,
)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [7]:
# DUMP MODEL CONFIG

model.dump_config(checkpoint_callback.dirpath)

In [8]:
trainer.fit(
    model,
    datamodule=RecogTextDataModule(
        train_datasets=[train_dataset],
        eval_datasets=[test_dataset],
        batch_size=batch_size,
    ),
)

You are using a CUDA device ('NVIDIA GeForce RTX 3080 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/mixaill76/.local/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/mixaill76/lightning_ocr/examples/checkpoints/abinet exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type             | Params | Mode 
------------------------------------------------------
0 | backbone | ResNetABI        | 13.0 M | train
1 | encoder  | ABIEncoder       | 9.5 M  | train
2 | decoder  | ABIVisionDecoder | 1.1 M  | train
------------------------------------------------------
23.5 M    Trainable params
0         Non-trainable params
23.5 M    Total p

Epoch 0:   0%|          | 5/1153 [00:00<02:02,  9.39it/s, v_num=2, loss/total_step=3.510, learning_rate=0.0001]



Epoch 19: 100%|██████████| 1153/1153 [01:05<00:00, 17.50it/s, v_num=2, loss/total_step=0.209, learning_rate=1e-7, loss/total_epoch=0.141]      

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 1153/1153 [01:05<00:00, 17.50it/s, v_num=2, loss/total_step=0.209, learning_rate=1e-7, loss/total_epoch=0.141]


In [9]:
!ls -lah "./checkpoints/abinet"

total 182M
drwxr-xr-x 2 mixaill76 mixaill76 4.0K Feb 11 16:46 .
drwxr-xr-x 3 mixaill76 mixaill76 4.0K Jan 11 23:20 ..
-rw-r--r-- 1 mixaill76 mixaill76  288 Feb 11 16:26 base_config.json
-rw-r--r-- 1 mixaill76 mixaill76  91M Jan 11 23:26 model-04-loss-0.17.ckpt
-rw------- 1 mixaill76 mixaill76  91M Feb 11 16:46 model-18-loss-0.14.ckpt
-rw-r--r-- 1 mixaill76 mixaill76  351 Feb 11 16:26 preprocessor_config.json
-rw-r--r-- 1 mixaill76 mixaill76   99 Feb 11 16:26 special_tokens_map.json
-rw-r--r-- 1 mixaill76 mixaill76 1.6K Feb 11 16:26 tokenizer.json
-rw-r--r-- 1 mixaill76 mixaill76 1.1K Feb 11 16:26 tokenizer_config.json
-rw-r--r-- 1 mixaill76 mixaill76  141 Feb 11 16:26 vocab.json
