In [1]:
!pip install pytorch-lightning



In [2]:
!pip install datasets



In [3]:
!pip install tokenizers



In [4]:
!pip install torchmetrics



In [5]:
!git clone https://github.com/Sushmitha-Katti/ERA.git

Cloning into 'ERA'...
remote: Enumerating objects: 399, done.[K
remote: Counting objects: 100% (232/232), done.[K
remote: Compressing objects: 100% (187/187), done.[K
remote: Total 399 (delta 113), reused 99 (delta 33), pack-reused 167[K
Receiving objects: 100% (399/399), 12.75 MiB | 11.15 MiB/s, done.
Resolving deltas: 100% (191/191), done.


In [6]:
cd /kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular

/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular


In [7]:
import os
from pathlib import Path

import warnings
from dataset import BilinualDataModule
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

from train import TextTranslator
from utils import get_model
from config import get_config

warnings.filterwarnings("ignore")

cfg = get_config()

cfg["batch_size"] = 16
cfg["preload"] = None
cfg["num_epochs"] = 10


dm = BilinualDataModule(cfg)
dm.setup("val")
dm.setup("train")
dm.setup("test")

train_loader, test_loader, val_loader = (
    dm.train_dataloader(),
    dm.test_dataloader(),
    dm.val_dataloader(),
)
tokenizer_src, tokenizer_tgt = dm.get_tokenizers()


os.environ["TOKENIZERS_PARALLELISM"] = "false"


Path(cfg["model_folder"]).mkdir(parents=True, exist_ok=True)

model = get_model(cfg, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size())
module = TextTranslator(
    model, tokenizer_src, tokenizer_tgt, cfg, val_loader, 1000
)


tb_logger = TensorBoardLogger("logs", name="S14_10_epochs")

Downloading builder script:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/7.98k [00:00<?, ?B/s]

Downloading and preparing dataset opus_books/en-it (download: 3.14 MiB, generated: 8.58 MiB, post-processed: Unknown size, total: 11.72 MiB) to /root/.cache/huggingface/datasets/opus_books/en-it/1.0.0/e8f950a4f32dc39b7f9088908216cd2d7e21ac35f893d04d39eb594746af2daf...


Downloading data:   0%|          | 0.00/3.30M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/32332 [00:00<?, ? examples/s]

Dataset opus_books downloaded and prepared to /root/.cache/huggingface/datasets/opus_books/en-it/1.0.0/e8f950a4f32dc39b7f9088908216cd2d7e21ac35f893d04d39eb594746af2daf. Subsequent calls will reuse this data.
Max length of soruce sentence:: 309
Max length of target sentence:: 274
Max length of soruce sentence:: 309
Max length of target sentence:: 274
Max length of soruce sentence:: 309
Max length of target sentence:: 274


In [8]:

trainer = Trainer(
  logger=tb_logger,
  enable_progress_bar = True,
  devices = 1,
  accelerator="gpu",
  max_epochs=10,
  
)

In [9]:
trainer.fit(module, train_dataloaders = train_loader, val_dataloaders = val_loader )

Training: 0it [00:00, ?it/s]

Loss for epoch 0 is 6.401790644324304
Saving Model
________________________________________________________________________________
     SOURCE:Therefore it is necessary for him to have a mind ready to turn itself accordingly as the winds and variations of fortune force it, yet, as I have said above, not to diverge from the good if he can avoid doing so, but, if compelled, then to know how to set about it.
     TARGET:E però bisogna che elli abbi uno animo disposto a volgersi secondo ch'e' venti e le variazioni della fortuna li comandono, e, come di sopra dissi, non partirsi dal bene, potendo, ma sapere intrare nel male, necessitato.
  PREDICTED:Non mi , ma non mi , ma non mi , ma non mi , ma non mi , ma non mi , ma non mi , ma non mi , e non mi .
________________________________________________________________________________
     SOURCE:He bowed, still not taking his eyes from the group of the dog and child.
     TARGET:Egli chinò la testa, senza togliere lo sguardo dalla bambina e d

In [12]:
import os
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator

# Path to the exported TensorBoard data
export_dir = '/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular/logs/S14_10_epochs/version_0'

# Create an EventAccumulator to read the exported data
event_acc = event_accumulator.EventAccumulator(export_dir)

# Load the data
event_acc.Reload()

# Extract scalar data
tags = event_acc.Tags()['scalars']
for tag in tags:
    data = event_acc.Scalars(tag)
    x = [point.step for point in data]
    y = [point.value for point in data]

    # Create a line plot for each scalar tag
    plt.plot(x, y, label=tag)
    plt.xlabel('Step')
    plt.ylabel('Value')
    plt.title(tag)
    plt.legend()
    plt.grid()

    # Save the plot as an image
    image_path = os.path.join(export_dir, f'{tag}.png')
    print(image_path)
    plt.savefig(image_path)
    plt.close()

print("Scalar data converted to images.")

/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular/logs/S14_10_epochs/version_0/train_loss.png
/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular/logs/S14_10_epochs/version_0/epoch.png
/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular/logs/S14_10_epochs/version_0/validation cer.png
/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular/logs/S14_10_epochs/version_0/validation wer.png
/kaggle/working/ERA/Session_14_Dawn_Of_Transformers/modular/logs/S14_10_epochs/version_0/validation BLEU.png
Scalar data converted to images.
