### Import modules

In [1]:
import lightning as L
from lightning.pytorch.callbacks import ModelSummary, ModelCheckpoint

In [2]:
import warnings

warnings.filterwarnings("ignore")


In [3]:
from model import TransformerModel
from dataset import TranslationDataModule
from config import get_config

conf = get_config()


### Import the Datamodule & Initialize

In [4]:
datamodule = TranslationDataModule(conf)
datamodule.setup()

Max length of source sentence: 309
Max length of target sentence: 274


### Initialize Model & Trainer

In [5]:
model = TransformerModel(
    src_vocab_size=datamodule.tokenizer_src.get_vocab_size(),
    tgt_vocab_size=datamodule.tokenizer_tgt.get_vocab_size(),
    src_seq_len=conf["seq_len"],
    tgt_seq_len=conf["seq_len"],
    d_model=conf["d_model"],
    tokenizer_src=datamodule.tokenizer_src,
    tokenizer_tgt=datamodule.tokenizer_tgt,
)

In [6]:
callbacks = [
    ModelSummary(max_depth=3),
    ModelCheckpoint(
        dirpath="model_checkpoints",
        filename="tr_{epoch}",
        monitor="train_loss",
        mode="min",
        save_last=True,
    ),
]


In [7]:
trainer = L.Trainer(
    accelerator="gpu",
    devices=[1],
    num_nodes=1,
    max_epochs=conf["num_epochs"],
    callbacks=callbacks,
    check_val_every_n_epoch=10,
)

Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Begin Training

In [8]:
trainer.fit(model=model, datamodule=datamodule)


You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: /home/ravi.naik/learning/era/s15/s15lit/lightning_logs


Max length of source sentence: 309
Max length of target sentence: 274


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name                                    | Type               | Params
--------------------------------------------------------------------------------
0  | transformer                             | Transformer        | 75.1 M
1  | transformer.encoder                     | Encoder            | 18.9 M
2  | transformer.encoder.layers              | ModuleList         | 18.9 M
3  | transformer.encoder.norm                | LayerNormalization | 2     
4  | transformer.decoder                     | Decoder            | 25.2 M
5  | transformer.decoder.layers              | ModuleList         | 25.2 M
6  | transformer.decoder.norm                | LayerNormalization | 2     
7  | transformer.src_embed                   | InputEmbeddings    | 8.0 M 
8  | transformer.src_embed.embedding         | Embedding          | 8.0 M 
9  | transformer.tgt_embed                   | InputEmbeddings    | 11.5 M
10 | transformer.tgt_embed.embedding         | Em

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
