In [1]:
from os.path import join as path_join
import random

from tqdm.auto import tqdm

import numpy as np

from sklearn.model_selection import train_test_split

from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms.v2 as T
import lightning as L

from data import AidaDataset
from transforms import FixedAspectResize, RandomSpots
from tokenizers import LaTeXTokenizer
from models import LaTeXOCREncoder, LaTeXOCRDecoder, LaTeXOCRModel, LitLaTeXOCRModel

In [2]:
def build_ocr_model(tokenizer: LaTeXTokenizer):
    encoder_backbone = torchvision.models.efficientnet_v2_m().features[:-1]
    encoder = LaTeXOCREncoder(
        encoder_backbone, 
        d_backbone=512, 
        d_model=128
    )

    decoder = LaTeXOCRDecoder(
        tokenizer.vocab_size, 
        d_model=128, 
        n_heads=4, 
        ff_dim=256, 
        n_layers=3, 
        dropout=0.1
    )

    return LaTeXOCRModel(encoder, decoder, tokenizer)

In [3]:
DATA_DIR = "data/cleaned_aida"

SEED = 101
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

INPUT_SIZE = 512
BATCH_SIZE = 8
LR = 1e-3

In [4]:
train_transform = T.Compose([
    RandomSpots(spots_range=(3, 7), w_range=(5, 10), h_range=(3, 5)),
    T.ToImage(),
    FixedAspectResize(512),
])
test_transform = T.Compose([
    T.ToImage(),
    FixedAspectResize(512),
])
train_set = AidaDataset(DATA_DIR, mode="train", transform=train_transform)
test_set = AidaDataset(DATA_DIR, mode="test", transform=test_transform)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE)

  0%|          | 0/8000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

In [13]:
# build model
tokenizer = LaTeXTokenizer.load_from(path_join(DATA_DIR, "vocab.json"))
model = build_ocr_model(tokenizer, device="cuda")

In [15]:
trainer = L.Trainer()
lit_model = LitLaTeXOCRModel(model, lr=LR, weight_decay=0.01)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/311505018/Projects/cv2024-term-project/.venv/lib/python3.8/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


TypeError: __init__() missing 1 required positional argument: 'ignore_indices'