### Load libraries and enviromental variables

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from dotenv import load_dotenv
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from torch.optim import Adam
from tqdm import tqdm
import numpy as np
import neptune
from neptune.utils import stringify_unsupported
from datasets import load_metric

load_dotenv()

True

### Load model and setup device

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForSeq2SeqLM.from_pretrained("model/model")
tokenizer = AutoTokenizer.from_pretrained("model/tokenizer/")
print(device)
model.to(device);

cuda


### Load metrics

In [3]:
bleu_metric = load_metric("bleu", trust_remote_code=True)
meteor_metric = load_metric("meteor", trust_remote_code=True)
rouge_metric = load_metric("rouge", trust_remote_code=True)

  bleu_metric = load_metric("bleu", trust_remote_code=True)
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\urbii\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\urbii\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\urbii\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


### Load and prepare data

In [4]:
data = pd.read_csv("data/All_data.csv")
train_indices = data.sample(frac=0.85).index
train_data = data.loc[train_indices].reset_index(drop=True)
valid_data = data.drop(train_indices).reset_index(drop=True)

print(train_data.shape)
print(valid_data.shape)

train_data.head()

(1029, 2)
(182, 2)


Unnamed: 0,pl,mig
0,Ja nie znam języka migowego.,Język migowy ja nie znać
1,Ja mam internet.,Ja internet mam
2,Gdzie w rzeszowie jest mops?,Mops rzeszów ulica jaka
3,Nazywam się nowak.,Ja nazwisko n o w a k
4,Dzieci nie mogą pić alkoholu.,Alkohol dzieci nie wolno pić


In [5]:
class TranslationDataset(Dataset):
    def __init__(self, input_texts, target_texts, tokenizer):
        self.input_texts = input_texts
        self.target_texts = target_texts
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.input_texts)

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.input_texts[idx], return_tensors="pt", padding=True, truncation=True)
        targets = self.tokenizer(self.target_texts[idx], return_tensors="pt", padding=True, truncation=True)
        return {**inputs, "labels": targets["input_ids"]}

In [6]:
def collate_fn(batch):
    input_ids = [item['input_ids'].squeeze() for item in batch]
    attention_mask = [item['attention_mask'].squeeze() for item in batch]
    labels = [item['labels'].squeeze() for item in batch]

    input_ids = pad_sequence(input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    attention_mask = pad_sequence(attention_mask, batch_first=True, padding_value=0)
    labels = pad_sequence(labels, batch_first=True, padding_value=tokenizer.pad_token_id)

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }

train_dataset = TranslationDataset(train_data.pl, train_data.mig, tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)

valid_dataset = TranslationDataset(train_data.pl, train_data.mig, tokenizer)
valid_dataloader = DataLoader(valid_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn)
next(iter(train_dataloader))

{'input_ids': tensor([[ 2849,   253,  1842,  1289,  2693,     2,     0, 63429, 63429, 63429,
          63429, 63429, 63429, 63429, 63429, 63429],
         [  883,  2320,  2176,     7,     0, 63429, 63429, 63429, 63429, 63429,
          63429, 63429, 63429, 63429, 63429, 63429],
         [   74, 14622,    49,   148, 14369,   478,     2,     0, 63429, 63429,
          63429, 63429, 63429, 63429, 63429, 63429],
         [  333,   421,  1110,   215,  1123,    77, 18351,    24,    21, 11430,
             10,  4371,   582,  1013,     2,     0],
         [  700,    22,  5024,  2404,   432,    31,  1524,     2,     0, 63429,
          63429, 63429, 63429, 63429, 63429, 63429],
         [  105, 10562,    43,  3750,  2601,  6695,     7,     0, 63429, 63429,
          63429, 63429, 63429, 63429, 63429, 63429],
         [  322,    54,  3291,    25,  6925,    10,   693,     2,     0, 63429,
          63429, 63429, 63429, 63429, 63429, 63429],
         [  322,    26,   413,  2196, 15039,  4121,     

### Create evaluation function

In [7]:
def evaluate_model_on_metrics(model, dataloader, tokenizer, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:
            batch = {key: value.to(device) for key, value in batch.items()}
            outputs = model.generate(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"])
            predictions = [tokenizer.decode(g, skip_special_tokens=True) for g in outputs]
            references = [tokenizer.decode(g, skip_special_tokens=True) for g in batch["labels"]]

            all_preds.extend(predictions)
            all_labels.extend(references)

    # Tokenize predictions and references
    all_preds_tokenized = [pred.split() for pred in all_preds]
    all_labels_tokenized = [[label.split()] for label in all_labels]
    

    # Compute metrics
    bleu_score = np.round(bleu_metric.compute(predictions=all_preds_tokenized, references=all_labels_tokenized)['bleu'], 3)
    meteor_score = np.round(meteor_metric.compute(predictions=all_preds, references=all_labels)['meteor'], 3)
    rouge_score = np.round(rouge_metric.compute(predictions=all_preds, references=all_labels)["rougeL"].mid.fmeasure, 3)

    return bleu_score, meteor_score, rouge_score

### Model training

In [12]:
run = neptune.init_run(tags="second run")
lr = 5e-5
num_epochs = 30
optimizer = Adam(model.parameters(), lr=lr)

run["hyperparameters/learning_rate"] = optimizer.state_dict()['param_groups'][0]['lr']
run["hyperparameters/optimizer"] = "Adam"
run["hyperparameters/betas"] = stringify_unsupported(optimizer.state_dict()['param_groups'][0]['betas'])
run["hyperparameters/eps"] = optimizer.state_dict()['param_groups'][0]['eps']
run["datasets/train"].track_files("data/All_data.csv")
run["hyperparameters/num_epochs"] = num_epochs

model.train()
for epoch in range(num_epochs):
    loss_all = 0
    for batch in tqdm(train_dataloader):
        batch = {key: value.to(device) for key, value in batch.items()}

        outputs = model(**batch)
        loss = outputs.loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_all += loss.item()
    run["train/loss"].append(np.round(loss_all / len(train_dataloader), 4))
    print(f"Epoch: {epoch + 1}, loss: {np.round(loss_all / len(train_dataloader), 4)}")
    
run["score/final_loss"] = np.round(loss_all / len(train_dataloader), 4)

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/kacperurban/pl-mig-translation/e/PLMIG-35


100%|██████████| 129/129 [00:09<00:00, 13.24it/s]


Epoch: 1, loss: 2.9807


100%|██████████| 129/129 [00:10<00:00, 12.77it/s]


Epoch: 2, loss: 1.3186


100%|██████████| 129/129 [00:10<00:00, 12.86it/s]


Epoch: 3, loss: 0.8383


100%|██████████| 129/129 [00:10<00:00, 12.46it/s]


Epoch: 4, loss: 0.5791


100%|██████████| 129/129 [00:09<00:00, 13.17it/s]


Epoch: 5, loss: 0.42


100%|██████████| 129/129 [00:09<00:00, 13.11it/s]


Epoch: 6, loss: 0.3249


100%|██████████| 129/129 [00:10<00:00, 12.75it/s]


Epoch: 7, loss: 0.2706


100%|██████████| 129/129 [00:09<00:00, 13.08it/s]


Epoch: 8, loss: 0.2194


100%|██████████| 129/129 [00:09<00:00, 13.15it/s]


Epoch: 9, loss: 0.1863


100%|██████████| 129/129 [00:09<00:00, 12.91it/s]


Epoch: 10, loss: 0.166


100%|██████████| 129/129 [00:10<00:00, 12.80it/s]


Epoch: 11, loss: 0.1568


100%|██████████| 129/129 [00:09<00:00, 12.90it/s]


Epoch: 12, loss: 0.1519


100%|██████████| 129/129 [00:10<00:00, 12.85it/s]


Epoch: 13, loss: 0.1544


100%|██████████| 129/129 [00:09<00:00, 12.90it/s]


Epoch: 14, loss: 0.1412


100%|██████████| 129/129 [00:09<00:00, 12.98it/s]


Epoch: 15, loss: 0.1359


100%|██████████| 129/129 [00:10<00:00, 12.54it/s]


Epoch: 16, loss: 0.1362


100%|██████████| 129/129 [00:10<00:00, 12.58it/s]


Epoch: 17, loss: 0.1289


100%|██████████| 129/129 [00:09<00:00, 13.14it/s]


Epoch: 18, loss: 0.12


100%|██████████| 129/129 [00:09<00:00, 13.50it/s]


Epoch: 19, loss: 0.1301


100%|██████████| 129/129 [00:09<00:00, 13.09it/s]


Epoch: 20, loss: 0.1355


100%|██████████| 129/129 [00:09<00:00, 13.43it/s]


Epoch: 21, loss: 0.1179


100%|██████████| 129/129 [00:09<00:00, 13.09it/s]


Epoch: 22, loss: 0.1159


100%|██████████| 129/129 [00:09<00:00, 13.11it/s]


Epoch: 23, loss: 0.1122


100%|██████████| 129/129 [00:09<00:00, 13.14it/s]


Epoch: 24, loss: 0.1048


100%|██████████| 129/129 [00:09<00:00, 13.30it/s]


Epoch: 25, loss: 0.1054


100%|██████████| 129/129 [00:09<00:00, 13.65it/s]


Epoch: 26, loss: 0.1094


100%|██████████| 129/129 [00:09<00:00, 13.26it/s]


Epoch: 27, loss: 0.0977


100%|██████████| 129/129 [00:09<00:00, 13.15it/s]


Epoch: 28, loss: 0.0997


100%|██████████| 129/129 [00:09<00:00, 13.02it/s]


Epoch: 29, loss: 0.1077


100%|██████████| 129/129 [00:10<00:00, 12.51it/s]

Epoch: 30, loss: 0.1082





In [13]:
bleu_score, meteor_score, rouge_score = evaluate_model_on_metrics(model, valid_dataloader, tokenizer, device)
run["metrics/BLEU"] = bleu_score
run["metrics/METEOR"] = meteor_score
run["metrics/ROUQE"] = rouge_score
run.stop()

[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
[neptune] [info   ] All 3 operations synced, thanks for waiting!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/kacperurban/pl-mig-translation/e/PLMIG-35/metadata


In [10]:
print(f"BLEU: {bleu_score} METEOR: {meteor_score}, ROUGE: {rouge_score}")

BLEU: 0.722 METEOR: 0.796, ROUGE: 0.854
