# Setup and Utils

In [1]:
!git clone https://github.com/Hari31416/transformer_from_scratch.git
!cp -r ./transformer_from_scratch/* ./
!pip install -q evaluate=='0.4.1'

In [2]:
import torch
import torch.nn as nn
import evaluate

from tqdm.auto import tqdm

T = torch.Tensor
M = nn.Module


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from src.train_utils import *
from src.transformer import *

  _torch_pytree._register_pytree_node(
2024-09-21 16:12:31.598557: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


For logging to wandb. Update the API key.

In [3]:
LOG_TO_WANDB = True
if LOG_TO_WANDB:
    from kaggle_secrets import UserSecretsClient
    import wandb

    user_secrets = UserSecretsClient()
    WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")

    text = f"""machine api.wandb.ai
        login user
        password {WANDB_API_KEY}
        """
    # wandb saves credentials at /root/.netrc
    with open("/root/.netrc", "w") as f:
        f.write(text)

    wandb.init(project="Transformer_From_Scratch", name="Run 8")
else:
    wandb = None

# Configs

Create the dataset config.

In [4]:
dataset_config = TranslationDatasetConfig(
    **{
        "dataset_path": "data/eng_french.csv",
        "source_tokenizer_path": "data/tokenizer_eng.json",
        "target_tokenizer_path": "data/tokenizer_fr.json",
        "source_column": "eng",
        "target_column": "fr",
        "max_len": 32,
        "device": device,
    }
)
dataset: TranslationDataset = dataset_config.load_object(TranslationDataset)
dataloader = dataset.get_dataloader(32, shuffle=True)

2024-09-21 16:12:35 - src.utils - INFO - No object provided, using ConfigFor


The model config.

In [5]:
transformer_config = TransformerConfig(
    **{
        "source_vocab_size": dataset.source_tokenizer.get_vocab_size(),
        "target_vocab_size": dataset.target_tokenizer.get_vocab_size(),
        "d_model": 512,
        "h": 8,
        "N_E": 6,
        "N_D": 6,
        "d_ff": 2048,
        "dropout": 0.1,
        "activation": nn.ReLU,
    }
)
model: Transformer = transformer_config.load_object(Transformer)

  model.load_state_dict(torch.load(saved_model_path, map_location=torch.device("cpu")))


<All keys matched successfully>

The training config.

In [6]:
english_tokenizer = dataset.source_tokenizer
french_tokenizer = dataset.target_tokenizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [7]:
trainer_config = TransformerTrainerConfig(
    **{
        "model": model,
        "optimizer": optimizer,
        "criterion": bce_crit,
        "source_tokenizer": english_tokenizer,
        "target_tokenizer": french_tokenizer,
        "max_len": dataset_config.max_len,
        "device": device,
        "wandb": wandb,
        "wandb_log_freq": 500,
        "scheduler": None,
    }
)
trainer: TransformerTrainerForTranslation = trainer_config.load_object(
    TransformerTrainerForTranslation
)



# Training

Once all the configs are created, training the model is as simple as running the following command.

In [8]:
losses, train_states = trainer.train(
    data_loader=dataloader,
    num_epochs=5,
    log_freq=500,
    eval_loader=None,
)

torch.save(model.state_dict(), "trained_transformer_for_translation.pth")

  0%|          | 0/5489 [00:00<?, ?it/s]

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Batch Step: 1, Loss: 0.000056, Tokens / Sec: 106.148605, Learning Rate: 0.0001
Batch Step: 501, Loss: 0.000014, Tokens / Sec: 3481.471436, Learning Rate: 0.0001
Batch Step: 1001, Loss: 0.000011, Tokens / Sec: 3477.150391, Learning Rate: 0.0001
Batch Step: 1501, Loss: 0.000010, Tokens / Sec: 3481.194092, Learning Rate: 0.0001
Batch Step: 2001, Loss: 0.000010, Tokens / Sec: 3503.724121, Learning Rate: 0.0001
Batch Step: 2501, Loss: 0.000007, Tokens / Sec: 3501.480225, Learning Rate: 0.0001
Batch Step: 3001, Loss: 0.000008, Tokens / Sec: 3487.280762, Learning Rate: 0.0001
Batch Step: 3501, Loss: 0.000007, Tokens / Sec: 3459.990967, Learning Rate: 0.0001
Batch Step: 4001, Loss: 0.000008, Tokens / Sec: 3480.816162, Learning Rate: 0.0001
Batch Step: 4501, Loss: 0.000006, Tokens / Sec: 3458.361816, Learning Rate: 0.0001
Batch Step: 5001, Loss: 0.000005, Tokens / Sec: 3480.673340, Learning Rate: 0.0001
Epoch: 0, Loss: 0.000009


  0%|          | 0/5489 [00:00<?, ?it/s]

Batch Step: 1, Loss: 0.000005, Tokens / Sec: 3857.224609, Learning Rate: 0.0001
Batch Step: 501, Loss: 0.000005, Tokens / Sec: 3501.197021, Learning Rate: 0.0001
Batch Step: 1001, Loss: 0.000005, Tokens / Sec: 3465.416504, Learning Rate: 0.0001
Batch Step: 1501, Loss: 0.000005, Tokens / Sec: 3495.798096, Learning Rate: 0.0001
Batch Step: 2001, Loss: 0.000004, Tokens / Sec: 3481.960205, Learning Rate: 0.0001
Batch Step: 2501, Loss: 0.000005, Tokens / Sec: 3469.224121, Learning Rate: 0.0001
Batch Step: 3001, Loss: 0.000004, Tokens / Sec: 3465.310303, Learning Rate: 0.0001
Batch Step: 3501, Loss: 0.000003, Tokens / Sec: 3508.902100, Learning Rate: 0.0001
Batch Step: 4001, Loss: 0.000004, Tokens / Sec: 3489.075439, Learning Rate: 0.0001
Batch Step: 4501, Loss: 0.000003, Tokens / Sec: 3492.900635, Learning Rate: 0.0001
Batch Step: 5001, Loss: 0.000003, Tokens / Sec: 3474.886475, Learning Rate: 0.0001
Epoch: 1, Loss: 0.000004


  0%|          | 0/5489 [00:00<?, ?it/s]

Batch Step: 1, Loss: 0.000003, Tokens / Sec: 3726.853271, Learning Rate: 0.0001
Batch Step: 501, Loss: 0.000003, Tokens / Sec: 3488.413818, Learning Rate: 0.0001
Batch Step: 1001, Loss: 0.000003, Tokens / Sec: 3494.720459, Learning Rate: 0.0001
Batch Step: 1501, Loss: 0.000003, Tokens / Sec: 3489.966309, Learning Rate: 0.0001
Batch Step: 2001, Loss: 0.000003, Tokens / Sec: 3461.248535, Learning Rate: 0.0001
Batch Step: 2501, Loss: 0.000004, Tokens / Sec: 3485.334717, Learning Rate: 0.0001
Batch Step: 3001, Loss: 0.000003, Tokens / Sec: 3452.766846, Learning Rate: 0.0001
Batch Step: 3501, Loss: 0.000004, Tokens / Sec: 3465.833984, Learning Rate: 0.0001
Batch Step: 4001, Loss: 0.000004, Tokens / Sec: 3484.840088, Learning Rate: 0.0001
Batch Step: 4501, Loss: 0.000004, Tokens / Sec: 3467.757568, Learning Rate: 0.0001
Batch Step: 5001, Loss: 0.000002, Tokens / Sec: 3501.396973, Learning Rate: 0.0001
Epoch: 2, Loss: 0.000003


  0%|          | 0/5489 [00:00<?, ?it/s]

Batch Step: 1, Loss: 0.000003, Tokens / Sec: 3573.742432, Learning Rate: 0.0001
Batch Step: 501, Loss: 0.000003, Tokens / Sec: 3485.248779, Learning Rate: 0.0001
Batch Step: 1001, Loss: 0.000003, Tokens / Sec: 3476.948975, Learning Rate: 0.0001
Batch Step: 1501, Loss: 0.000002, Tokens / Sec: 3495.762695, Learning Rate: 0.0001
Batch Step: 2001, Loss: 0.000002, Tokens / Sec: 3473.128174, Learning Rate: 0.0001
Batch Step: 2501, Loss: 0.000002, Tokens / Sec: 3486.371582, Learning Rate: 0.0001
Batch Step: 3001, Loss: 0.000003, Tokens / Sec: 3418.929443, Learning Rate: 0.0001
Batch Step: 3501, Loss: 0.000003, Tokens / Sec: 3461.451172, Learning Rate: 0.0001
Batch Step: 4001, Loss: 0.000002, Tokens / Sec: 3477.883545, Learning Rate: 0.0001
Batch Step: 4501, Loss: 0.000002, Tokens / Sec: 3458.683838, Learning Rate: 0.0001
Batch Step: 5001, Loss: 0.000003, Tokens / Sec: 3445.753174, Learning Rate: 0.0001
Epoch: 3, Loss: 0.000003


  0%|          | 0/5489 [00:00<?, ?it/s]

Batch Step: 1, Loss: 0.000002, Tokens / Sec: 3861.412109, Learning Rate: 0.0001
Batch Step: 501, Loss: 0.000002, Tokens / Sec: 3463.711670, Learning Rate: 0.0001
Batch Step: 1001, Loss: 0.000002, Tokens / Sec: 3494.246094, Learning Rate: 0.0001
Batch Step: 1501, Loss: 0.000003, Tokens / Sec: 3486.167236, Learning Rate: 0.0001
Batch Step: 2001, Loss: 0.000002, Tokens / Sec: 3510.376465, Learning Rate: 0.0001
Batch Step: 2501, Loss: 0.000003, Tokens / Sec: 3460.096924, Learning Rate: 0.0001
Batch Step: 3001, Loss: 0.000002, Tokens / Sec: 3473.584961, Learning Rate: 0.0001
Batch Step: 3501, Loss: 0.000002, Tokens / Sec: 3495.129639, Learning Rate: 0.0001
Batch Step: 4001, Loss: 0.000002, Tokens / Sec: 3476.411377, Learning Rate: 0.0001
Batch Step: 4501, Loss: 0.000001, Tokens / Sec: 3478.095459, Learning Rate: 0.0001
Batch Step: 5001, Loss: 0.000002, Tokens / Sec: 3494.959473, Learning Rate: 0.0001
Epoch: 4, Loss: 0.000002


In [9]:
samples = dataset.df.sample(50)
eng_txts = samples["eng"].values.tolist()
fr_txts = samples["fr"].values.tolist()

fr_predicted = []
for txt in tqdm(eng_txts):
    t, _ = trainer.translate(txt)
    fr_predicted.append(t[0])
    
bleu = evaluate.load("bleu")
score = bleu.compute(predictions=fr_txts, references=fr_predicted)
score

  0%|          | 0/50 [00:00<?, ?it/s]

{'bleu': 0.22040571362563185,
 'precisions': [0.5644699140401146,
  0.31438127090301005,
  0.20883534136546184,
  0.135678391959799],
 'brevity_penalty': 0.8276940084998547,
 'length_ratio': 0.8409638554216867,
 'translation_length': 349,
 'reference_length': 415}

In [10]:
pd.set_option("display.width", 1000)
d = pd.DataFrame(
    {
        "fr_predicted": fr_predicted,
        "fr_txts": fr_txts,
        "eng_txts": eng_txts,
    }
)
d

Unnamed: 0,fr_predicted,fr_txts,eng_txts
0,je t ' aime tout.,Je vous adore tous.,I love you all.
1,"tu le ferais toujours, si tu le veux.",Tu peux toujours le faire si tu veux.,You may still do it if you want to.
2,je suis en train de travailler le dimanche.,Je suis contre le travail du dimanche.,I am against working on Sundays.
3,mets les œufs en l ' eau sur le bout de l ' eau.,Mettez les œufs dans l'eau bouillante.,Put the eggs into the boiling water.
4,ne t ' inquiete pas. je suis medecin.,Ne t'en fais pas. Je suis docteur.,Don't worry. I'm a doctor.
5,tom fut professeur d ' occasion depuis trente ...,Tom était enseignant pendant presque trente ans.,Tom was a teacher for nearly thirty years.
6,je regrette avoir donne la chance de la rencon...,Je déplore avoir manqué l'opportunité de la re...,I regret missing the chance to meet her.
7,j ' ai achete le livre.,J'ai acheté le livre.,I bought the book.
8,je ne compris aussi rien.,"Moi non plus, je n'ai rien compris.","I, too, didn't understand anything."
9,tu peux manger autant que tu le veux.,Tu peux manger autant que tu veux.,You may eat as much as you want to.
