In [1]:
!pip install evaluate
!pip install sacrebleu

Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: evaluate
Successfully installed evaluate-0.4.0
[0mCollecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sacrebleu
Successfully installed sacrebleu-2.3.1
[0m

In [1]:
import math

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from transformers import AutoTokenizer, MT5ForConditionalGeneration, set_seed
# import evaluate

from tqdm import tqdm

set_seed(42)

#bleu = evaluate.load('sacrebleu')

In [3]:
class TData(Dataset):
    def __init__(self, df, tokenizer, device='cpu'):
        super(TData, self).__init__()

        self.df = df
        self.tokenizer = tokenizer
        self.device= device

    def __getitem__(self, i):
        row = self.df.iloc[i]

        cloze = f'answer: {row["answer"]} context: {row["cloze"]}'
        question = f'question: {row["question"]}'
        
        encoder_inputs = self.tokenizer(cloze, padding='max_length', max_length=256,
                                        truncation=True, return_tensors='pt')
        decoder_outputs = self.tokenizer(question, padding='max_length', max_length=256,
                                         truncation=True, return_tensors="pt")

        encoder_inputs = {k: v[0].to(self.device) for k, v in encoder_inputs.items()}
        decoder_outputs = {f'decoder_{k}': v[0].to(self.device) for k, v in decoder_outputs.items()}

        return {**encoder_inputs, **decoder_outputs, 'labels': decoder_outputs['decoder_input_ids']}

    def __len__(self):
        return len(self.df)


In [20]:
def train_step(model, train_loader, optimizer):
    model.train()

    losses, ppls, bleus = [], [], []
    pbar = tqdm(train_loader)
    for i, batch in enumerate(pbar):
        optimizer.zero_grad()
        out = model(**batch)
        loss = out.loss

        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        ppls.append(math.exp(loss.item()))
        
        references = [[o] for o in tokenizer.batch_decode(batch['decoder_input_ids'], skip_special_tokens=True)]
        predictions = tokenizer.batch_decode(out.logits.argmax(dim=-1), skip_special_tokens=True)
        results = bleu.compute(predictions=predictions, references=references)
        bleus.append(results['score'])

        pbar.set_description(f'Batch {i+1}/{len(train_loader)}: Loss: {np.mean(losses):.4f} - Perplexity: {np.mean(ppls):.4f} - Bleu: {np.mean(bleus):.4f}')

    return np.mean(losses), np.mean(ppls), np.mean(bleus)


def eval_step(model, val_loader):
    model.eval()

    losses, ppls, bleus = [], [], []
    with torch.no_grad():
        pbar = tqdm(val_loader)
        for i, batch in enumerate(pbar):
            out = model(**batch)
            loss = out.loss           
            
            losses.append(loss.item())
            ppls.append(math.exp(loss.item()))
            
            references = [[o] for o in tokenizer.batch_decode(batch['decoder_input_ids'], skip_special_tokens=True)]
            predictions = tokenizer.batch_decode(out.logits.argmax(dim=-1), skip_special_tokens=True)
            results = bleu.compute(predictions=predictions, references=references)
            bleus.append(results['score'])
            
            pbar.set_description(f'Batch {i+1}/{len(val_loader)}: Loss: {np.mean(losses):.4f} - Perplexity: {np.mean(ppls):.4f} - Bleu: {np.mean(bleus):.4f}')
        
            if i == 0:
                print('Example Reference: ', references[0])
                print('Example Prediction: ', predictions[0])

    return np.mean(losses), np.mean(ppls), np.mean(bleus)


def train(model, train_loader, optimizer, val_loader=None, device='cpu'):
    train_losses, train_ppls, train_bleus = [], [], []
    val_losses, val_ppls, val_bleus = [], [], []

    for epoch in range(EPOCHS):
        print(f'Epoch {epoch+1}/{EPOCHS}')

        train_loss, train_ppl, train_bleu = train_step(model, train_loader, optimizer)
        train_losses.append(train_loss)
        train_ppls.append(train_ppl)
        train_bleus.append(train_bleu)

        if val_loader is not None:
            val_loss, val_ppl, val_bleu = eval_step(model, val_loader)
            val_losses.append(val_loss)
            val_ppls.append(val_ppl)
            val_bleus.append(val_bleu)
        
        test_case = val_df.iloc[0]
        question = ask(model, test_case.answer, test_case.cloze, device=DEVICE)
        print(f'GENERATED -> {question}')

    return train_losses, train_ppls, train_bleus, val_losses, val_ppls, val_bleus

@torch.no_grad()
def ask(model, answer, cloze, device='cpu'):
    encoder_inputs = tokenizer(f'answer: {answer} context: {cloze}', return_tensors='pt', truncation=True).input_ids.to(device)
    decoder_inputs = tokenizer('question: ', return_tensors='pt', add_special_tokens=False).input_ids.to(device)
    
    """
    generated_ids = model.generate(encoder_inputs,
                                   decoder_input_ids=decoder_inputs,
                                   pad_token_id=tokenizer.eos_token_id,
                                   num_beams=5, do_sample=True,
                                   top_k=50, top_p=0.95, early_stopping=True,
                                   no_repeat_ngram_size=1,
                                   max_new_tokens=50, num_return_sequences=1)
    """
    generated_ids = model.generate(encoder_inputs, decoder_input_ids=decoder_inputs,
                                   num_beams=1, num_return_sequences=1, 
                                   do_sample=False, max_new_tokens=50)

    return tokenizer.batch_decode(generated_ids)

ask(model, answer=answer, cloze=cloze, device='cpu')

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


["question:'''''''''''''''''''''''''"]

In [19]:
tokenizer = AutoTokenizer.from_pretrained('google/mt5-small')
model = MT5ForConditionalGeneration.from_pretrained('google/mt5-small').to('cpu')

In [11]:
cloze = '''Panthers hattında ayrıca, sadece 9 başlangıçta 5 sack eden uç çizgi savunmacısı Kony Ealy ile birlikte 136 kez ile NFL'nin aktif kariyer sack lideri ve 5 kez profesyonel bir top atıcısı olan Jared Allen öne çıkmaktadır.'''
answer = '''136'''

ask(model, answer=answer, cloze=cloze, device='cpu')

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


['question:::::::::::::::::::::::::::::::::::::::::::::::::::']

In [21]:
from transformers import pipeline
generator = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
generator(f'answer: {answer} context: {cloze}', max_length=50, num_beams=1, do_sample=False)

[{'generated_text': '<extra_id_0> lideri'}]

In [5]:
TRAIN_DIR = '/kaggle/input/my-quad/my_quad.csv'

trainval_df = pd.read_csv(TRAIN_DIR)

train_df = trainval_df.sample(frac=.85, random_state=42)
val_df = trainval_df.drop(train_df.index)

In [6]:
BATCH_SIZE = 2
EPOCHS = 20
DEVICE = 'cuda'

In [7]:
model_path = 'google/mt5-small'

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = MT5ForConditionalGeneration.from_pretrained(model_path).to(DEVICE)

optimizer = optim.Adadelta(model.parameters())

Downloading (…)okenizer_config.json:   0%|          | 0.00/82.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/553 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

  "The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option"


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [4]:
train_data = TData(train_df, tokenizer, device=DEVICE)
val_data = TData(val_df, tokenizer, device=DEVICE)

NameError: name 'TData' is not defined

In [9]:
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE)

In [10]:
ex = val_df.iloc[0]
print(f'answer -> {ex.answer}\ncloze -> {ex.cloze}\nquestion -> {ex.question}')

answer -> 136
cloze ->  Panthers hattında ayrıca, sadece 9 başlangıçta 5 sack eden uç çizgi savunmacısı Kony Ealy ile birlikte 136 kez ile NFL'nin aktif kariyer sack lideri ve 5 kez profesyonel bir top atıcısı olan Jared Allen öne çıkmaktadır.
question -> Jared Allen'ın kaç tane kariyer sack edişi vardır?


In [18]:
train_losses, train_ppls, train_bleus, val_losses, val_ppls, val_bleus = train(model, train_loader, optimizer, val_loader=val_loader, device=DEVICE)

Epoch 1/20


Batch 566/566: Loss: 1.3193 - Perplexity: 2293294902424658727927808.0000 - Bleu: 3.0374: 100%|██████████| 566/566 [01:54<00:00,  4.93it/s] 
Batch 5/100: Loss: 0.4483 - Perplexity: 1.5682 - Bleu: 6.2415:   3%|▎         | 3/100 [00:00<00:04, 21.41it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: ne  yıli ne ne ne ne    ne?


Batch 100/100: Loss: 0.4573 - Perplexity: 1.5908 - Bleu: 5.6788: 100%|██████████| 100/100 [00:04<00:00, 20.93it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 2/20


Batch 566/566: Loss: 0.4311 - Perplexity: 1.5515 - Bleu: 6.1357: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.3083 - Perplexity: 1.3638 - Bleu: 8.2176:   3%|▎         | 3/100 [00:00<00:04, 21.42it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: questionadı İ'ın kaç kaç kaçadı ilmiştir adıadı?


Batch 100/100: Loss: 0.3331 - Perplexity: 1.4016 - Bleu: 8.2496: 100%|██████████| 100/100 [00:05<00:00, 19.67it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 3/20


Batch 566/566: Loss: 0.2577 - Perplexity: 1.3009 - Bleu: 10.3577: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0507 - Perplexity: 1.0521 - Bleu: 46.1296:   3%|▎         | 3/100 [00:00<00:04, 21.74it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane taneer eden edmektedir vardır?


Batch 100/100: Loss: 0.0710 - Perplexity: 1.0740 - Bleu: 41.5976: 100%|██████████| 100/100 [00:04<00:00, 21.13it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 4/20


Batch 566/566: Loss: 0.1043 - Perplexity: 1.1114 - Bleu: 25.8855: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0065 - Perplexity: 1.0065 - Bleu: 89.4749:   3%|▎         | 3/100 [00:00<00:04, 20.87it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0154 - Perplexity: 1.0156 - Bleu: 79.8600: 100%|██████████| 100/100 [00:04<00:00, 20.97it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 5/20


Batch 566/566: Loss: 0.0534 - Perplexity: 1.0552 - Bleu: 44.6756: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0028 - Perplexity: 1.0028 - Bleu: 97.6677:   3%|▎         | 3/100 [00:00<00:04, 21.04it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0074 - Perplexity: 1.0074 - Bleu: 90.5437: 100%|██████████| 100/100 [00:04<00:00, 20.97it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 6/20


Batch 566/566: Loss: 0.0344 - Perplexity: 1.0352 - Bleu: 57.7159: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0012 - Perplexity: 1.0012 - Bleu: 97.6677:   3%|▎         | 3/100 [00:00<00:04, 21.51it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0034 - Perplexity: 1.0034 - Bleu: 95.2444: 100%|██████████| 100/100 [00:04<00:00, 20.75it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 7/20


Batch 566/566: Loss: 0.0256 - Perplexity: 1.0260 - Bleu: 65.8121: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 5/100: Loss: 0.0005 - Perplexity: 1.0005 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.43it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0029 - Perplexity: 1.0029 - Bleu: 95.4572: 100%|██████████| 100/100 [00:05<00:00, 19.55it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 8/20


Batch 566/566: Loss: 0.0191 - Perplexity: 1.0193 - Bleu: 71.7069: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0003 - Perplexity: 1.0003 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.64it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0023 - Perplexity: 1.0023 - Bleu: 96.6362: 100%|██████████| 100/100 [00:04<00:00, 20.86it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 9/20


Batch 566/566: Loss: 0.0159 - Perplexity: 1.0161 - Bleu: 76.2055: 100%|██████████| 566/566 [01:55<00:00,  4.92it/s]
Batch 5/100: Loss: 0.0003 - Perplexity: 1.0003 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.29it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0017 - Perplexity: 1.0017 - Bleu: 96.9264: 100%|██████████| 100/100 [00:04<00:00, 20.35it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 10/20


Batch 566/566: Loss: 0.0135 - Perplexity: 1.0136 - Bleu: 79.7002: 100%|██████████| 566/566 [01:54<00:00,  4.94it/s]
Batch 5/100: Loss: 0.0002 - Perplexity: 1.0002 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.49it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0018 - Perplexity: 1.0018 - Bleu: 96.8738: 100%|██████████| 100/100 [00:04<00:00, 20.94it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 11/20


Batch 566/566: Loss: 0.0111 - Perplexity: 1.0112 - Bleu: 82.0375: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0001 - Perplexity: 1.0001 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.23it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0014 - Perplexity: 1.0014 - Bleu: 97.0582: 100%|██████████| 100/100 [00:04<00:00, 20.97it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 12/20


Batch 566/566: Loss: 0.0095 - Perplexity: 1.0096 - Bleu: 84.6348: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 4/100: Loss: 0.0000 - Perplexity: 1.0000 - Bleu: 100.0000:   4%|▍         | 4/100 [00:00<00:05, 16.09it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0010 - Perplexity: 1.0010 - Bleu: 97.9793: 100%|██████████| 100/100 [00:05<00:00, 19.64it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 13/20


Batch 566/566: Loss: 0.0087 - Perplexity: 1.0088 - Bleu: 85.4465: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 5/100: Loss: 0.0002 - Perplexity: 1.0002 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.45it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0013 - Perplexity: 1.0013 - Bleu: 97.2855: 100%|██████████| 100/100 [00:04<00:00, 20.78it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 14/20


Batch 566/566: Loss: 0.0074 - Perplexity: 1.0075 - Bleu: 87.3720: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 5/100: Loss: 0.0006 - Perplexity: 1.0006 - Bleu: 98.0823:   3%|▎         | 3/100 [00:00<00:04, 21.33it/s] 

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0012 - Perplexity: 1.0013 - Bleu: 97.5209: 100%|██████████| 100/100 [00:04<00:00, 20.60it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 15/20


Batch 566/566: Loss: 0.0066 - Perplexity: 1.0066 - Bleu: 88.4932: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 5/100: Loss: 0.0001 - Perplexity: 1.0001 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.72it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0013 - Perplexity: 1.0013 - Bleu: 97.1753: 100%|██████████| 100/100 [00:04<00:00, 21.08it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 16/20


Batch 566/566: Loss: 0.0057 - Perplexity: 1.0057 - Bleu: 89.0434: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 5/100: Loss: 0.0002 - Perplexity: 1.0002 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.63it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0010 - Perplexity: 1.0010 - Bleu: 97.4340: 100%|██████████| 100/100 [00:04<00:00, 21.23it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 17/20


Batch 566/566: Loss: 0.0052 - Perplexity: 1.0052 - Bleu: 90.7826: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 4/100: Loss: 0.0001 - Perplexity: 1.0001 - Bleu: 100.0000:   2%|▏         | 2/100 [00:00<00:06, 15.59it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0011 - Perplexity: 1.0011 - Bleu: 97.5330: 100%|██████████| 100/100 [00:04<00:00, 20.26it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 18/20


Batch 566/566: Loss: 0.0050 - Perplexity: 1.0050 - Bleu: 91.0255: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 4/100: Loss: 0.0000 - Perplexity: 1.0000 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 20.63it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0011 - Perplexity: 1.0011 - Bleu: 97.7591: 100%|██████████| 100/100 [00:04<00:00, 21.19it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 19/20


Batch 566/566: Loss: 0.0049 - Perplexity: 1.0049 - Bleu: 91.3461: 100%|██████████| 566/566 [01:54<00:00,  4.96it/s]
Batch 5/100: Loss: 0.0001 - Perplexity: 1.0001 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.60it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0012 - Perplexity: 1.0012 - Bleu: 97.8853: 100%|██████████| 100/100 [00:04<00:00, 20.91it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']
Epoch 20/20


Batch 566/566: Loss: 0.0040 - Perplexity: 1.0040 - Bleu: 92.6174: 100%|██████████| 566/566 [01:54<00:00,  4.95it/s]
Batch 5/100: Loss: 0.0002 - Perplexity: 1.0002 - Bleu: 100.0000:   3%|▎         | 3/100 [00:00<00:04, 21.38it/s]

Example Reference:  ["question: Jared Allen'ın kaç tane kariyer sack edişi vardır?"]
Example Prediction:  question: Jared Allen'ın kaç tane kariyer sack edişi vardır?


Batch 100/100: Loss: 0.0008 - Perplexity: 1.0008 - Bleu: 98.5489: 100%|██████████| 100/100 [00:04<00:00, 20.93it/s]


GENERATED -> ['question:::::::::::::::::::::::::::::::::::::::::::::::::::']


In [17]:
ex = val_df.iloc[0]
ask(model, ex.answer, ex.cloze, device=DEVICE)

["question:'''''''''''''''''''''''' '"]

In [19]:
model.save_pretrained('mt5_12april')

In [None]:
!zip -r best_bert_gpt2.zip best_bert_gpt2/

In [None]:
while True:
    pass

In [None]:
!cp /content/attacker_bert_gpt2.pt /content/drive/MyDrive/adversarial-taboo/adversarial-taboo-models