In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [14]:
%pwd

'/content/drive/MyDrive/transformer'

In [15]:
%cd drive/MyDrive/transformer/

[Errno 2] No such file or directory: 'drive/MyDrive/transformer/'
/content/drive/MyDrive/transformer


In [10]:
%pip install datasets
%pip install tokenizers
%pip install torchmetrics
%pip install pytorch-lightning
%pip install watermark



In [16]:
import torch
import pytorch_lightning as pl

from config import Config
from lightning_datamodule import BilingualDataModule
from lightning_model import LightningTransformer

import warnings

warnings.filterwarnings("ignore")

In [17]:
%load_ext watermark
%watermark -v -p torch,lightning

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Python implementation: CPython
Python version       : 3.10.12
IPython version      : 7.34.0

torch    : 2.0.1+cu118
lightning: not installed



In [18]:
data_module = BilingualDataModule(
    dataset_path=Config.dataset_path,
    src_lang=Config.lang_src,
    tgt_lang=Config.lang_tgt,
    seq_len=Config.seq_len,
    batch_size=Config.batch_size,
    num_workers=Config.num_workers,
    pin_memory=Config.pin_memory,
    tokenizer_file=Config.tokenizer_file,
)

Downloading data:   0%|          | 0.00/12.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/127085 [00:00<?, ? examples/s]

Filter:   0%|          | 0/127085 [00:00<?, ? examples/s]

Filter:   0%|          | 0/126797 [00:00<?, ? examples/s]

Max length of source sentence: 150
Max length of target sentence: 159


In [19]:
model = LightningTransformer(
    tokenizer_src=data_module.tokenizer_src,
    tokenizer_tgt=data_module.tokenizer_tgt,
    seq_len=Config.seq_len,
    d_model=Config.d_model,
    lr=Config.lr,
    weight_decay=Config.weight_decay,
    eps=Config.eps,
    label_smoothing=Config.label_smoothing,
)

In [21]:
import sys
from pytorch_lightning.callbacks import (
    TQDMProgressBar,
    LearningRateMonitor,
    ModelCheckpoint,
    ModelPruning,
    EarlyStopping,
)
from pytorch_lightning.loggers import TensorBoardLogger

from pytorch_lightning.profilers import PyTorchProfiler


class MyProgressBar(TQDMProgressBar):
    def init_validation_tqdm(self):
        bar = super().init_validation_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_predict_tqdm(self):
        bar = super().init_predict_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

logger = TensorBoardLogger("tb_logs", name="transformer_er_fr_v1")

# training
trainer = pl.Trainer(
    log_every_n_steps=1,
    callbacks=[
        MyProgressBar(refresh_rate=1),
        LearningRateMonitor(logging_interval="epoch"),
        ModelCheckpoint(
            dirpath="ckpt_logs/logs",
            save_top_k=1,
            monitor="train_loss",
            mode="min",
            filename="model-{epoch:02d}-{train_loss:4f}",
            save_last=True,
        ),
        EarlyStopping(monitor="train_loss", mode="min", stopping_threshold=1.5),
    ],
    logger=logger,
    precision=32,
    accelerator= "gpu",
    devices="auto",
    # strategy="ddp_notebook",
    check_val_every_n_epoch=1,
    # limit_train_batches=5,
    limit_val_batches=2,
    # limit_test_batches=1,
    max_epochs=35,
    # max_epochs=1,
    # profiler=PyTorchProfiler(),
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [22]:
# Uncomment the following line to train the model
#trainer.fit(model, datamodule=data_module)
trainer.fit(model, datamodule=data_module, ckpt_path="/content/drive/MyDrive/transformer/ckpt_logs/logs/model-epoch=33-train_loss=3.362326.ckpt")

INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/drive/MyDrive/transformer/ckpt_logs/logs/model-epoch=33-train_loss=3.362326.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loading `train_dataloader` to estimate number of stepping batches.
INFO:pytorch_lightning.callbacks.model_summary:
  | Name     | Type             | Params
----------------------------------------------
0 | model    | Transformer      | 57.1 M
1 | criteria | CrossEntropyLoss | 0     
------

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=35` reached.
