In [1]:
# imports
import warnings
import numpy as np
import pandas as pd

import torch
import transformers

from datasets import Dataset
from datasets import load_metric

from tqdm import tqdm
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoModelForSeq2SeqLM
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm





### Helper functions

In [2]:
def postprocess_text(preds: list, labels: list) -> tuple:
    """Performs post processing on the prediction text and labels"""

    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def prep_data_for_model_fine_tuning(source_lang: list, target_lang: list) -> list:
    """Takes the input data lists and converts into translation list of dicts"""

    data_dict = dict()
    data_dict['translation'] = []

    for sr_text, tr_text in zip(source_lang, target_lang):
        temp_dict = dict()
        temp_dict['ar'] = sr_text
        temp_dict['en'] = tr_text

        data_dict['translation'].append(temp_dict)

    return data_dict

def generate_model_ready_dataset(dataset: list, source: str, target: str,
                                 model_checkpoint: str,
                                 tokenizer: AutoTokenizer):
    """Makes the data training ready for the model"""

    preped_data = []

    for row in dataset:
        inputs = ""+ row[source]
        targets = row[target]

        model_inputs = tokenizer(inputs, max_length=128,
                                 truncation=True, padding=True)

        model_inputs['translation'] = row

        # setup the tokenizer for targets
        with tokenizer.as_target_tokenizer():
            labels = tokenizer(targets, max_length=128,
                                 truncation=True, padding=True)
            model_inputs['labels'] = labels['input_ids']

        preped_data.append(model_inputs)

    return preped_data

def compute_metrics(eval_preds: tuple) -> dict:
    """computes bleu score and other performance metrics """

    metric = load_metric("sacrebleu")
    tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")

    preds, labels = eval_preds

    if isinstance(preds, tuple):
        preds = preds[0]

    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Some simple post-processing
    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {'bleu': result['score']}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]

    result['gen_len'] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}

    return result


### Loading the dataset

In [3]:
data = pd.read_excel('cleaned_data(Arzen).xlsx')
data.head(10)

Unnamed: 0,egyption_Text,english_Text,category,sub_category
0,الأمير الصغير,The little prince,Novels,the-little-prince
1,أنطوان دي سانت إكزوبيري,Antoine De Saint-Exuper,Novels,the-little-prince
2,الفصل الأول,Chapter 1,Novels,the-little-prince
3,في مره، لما كان عندي ست سنين، شفت صوره روعه، ف...,Once when I was six years old I saw a magnific...,Novels,the-little-prince
4,وكانت عباره عن تعبان من نوع البوا بيبلع فهد.,It was a picture of a boa constrictor in the a...,Novels,the-little-prince
5,ودي نسخة من الرسمه.,Here is a copy of the drawing.,Novels,the-little-prince
6,كان بيقولو في الكتاب: تعابين البوا بيبلعو فريس...,In the book it said: Boa constrictors swallow ...,Novels,the-little-prince
7,ومش بيبقو قادرين يتحركو بعد كدا، وبينامو لمده ...,"After that they are not able to move, and they...",Novels,the-little-prince
8,بعدها، فكرت كثير في مغامرات الأدغال,"I pondered deeply, then, over the adventures o...",Novels,the-little-prince
9,وأنا كمان نجحت في إني أرسم أول رسمه بالألوان ا...,And after some work with a coloured pencil I s...,Novels,the-little-prince


### Split inputs and outputs and perform train/test split

In [4]:
# split inputs and outputs into different sets
inputs = data['egyption_Text']
outputs = data['english_Text']

# apply the train, test, validation split
x_train, x_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2, random_state = 40)

In [5]:
print('training set: \nx: ',x_train.shape,' y: ',y_train.shape, '\ntest set: \nx: ',x_test.shape,' y: ',y_test.shape)

training set: 
x:  (19084,)  y:  (19084,) 
test set: 
x:  (4772,)  y:  (4772,)


Splitting the training set into **training** and **validation** sets

In [6]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.5, random_state=40)

In [7]:
print('training set: \nx: ',x_train.shape,' y: ',y_train.shape, '\ntest set: \nx: ',x_val.shape,' y: ',y_val.shape)

training set: 
x:  (9542,)  y:  (9542,) 
test set: 
x:  (9542,)  y:  (9542,)


### Tokenize and prep the data using the helper functions

In [8]:
# load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")

In [9]:
training_data = prep_data_for_model_fine_tuning(x_train.values, y_train.values)

validation_data = prep_data_for_model_fine_tuning(x_val.values, y_val.values)

test_data = prep_data_for_model_fine_tuning(x_test.values, y_test.values)

In [10]:
# Tokenize the data
train_data = generate_model_ready_dataset(dataset=training_data['translation'],
                                          tokenizer=tokenizer,
                                          source='ar',
                                          target='en',
                                          model_checkpoint="Helsinki-NLP/opus-mt-ar-en")

validation_data = generate_model_ready_dataset(dataset=validation_data['translation'],
                                          tokenizer=tokenizer,
                                          source='ar',
                                          target='en',
                                          model_checkpoint="Helsinki-NLP/opus-mt-ar-en")

test_data = generate_model_ready_dataset(dataset=test_data['translation'],
                                          tokenizer=tokenizer,
                                          source='ar',
                                          target='en',
                                          model_checkpoint="Helsinki-NLP/opus-mt-ar-en")

Converting the list of dictionaries into a pandas dataframe to later on be able to convert it to a dataset object

In [11]:
train_df = pd.DataFrame.from_records(train_data)
validation_df = pd.DataFrame.from_records(validation_data)
test_df = pd.DataFrame.from_records(test_data)

In [12]:
# Convert the dataframes to dataset objects
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)
validation_dataset = Dataset.from_pandas(validation_df)

### Loading the model, defining the training arguments and the data collator

In [13]:
# load the model
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-ar-en")

**Training Arguments:**
* learning rate: 2e-5
* batch size: 16
* number of epochs: 3

In [14]:
training_args = Seq2SeqTrainingArguments(
    output_dir = './results',  # output directory to store results
    num_train_epochs = 3,
    per_device_train_batch_size = 16, 
    per_device_eval_batch_size = 16, 
    learning_rate = 0.00002, 
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    predict_with_generate=True
)

In [15]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

### Training the model with the defined training arguments

In [16]:
trainer = Seq2SeqTrainer(model= model, 
                  args = training_args, 
                  train_dataset=train_dataset, 
                  eval_dataset = validation_dataset,
                  data_collator=data_collator,
                  tokenizer= tokenizer,
                  compute_metrics= compute_metrics)
trainer.train()

  1%|          | 10/1791 [00:28<1:18:46,  2.65s/it]

{'loss': 2.7926, 'grad_norm': 9.328065872192383, 'learning_rate': 1.9888330541596874e-05, 'epoch': 0.02}


  1%|          | 20/1791 [00:58<1:18:50,  2.67s/it]

{'loss': 2.7167, 'grad_norm': 8.610991477966309, 'learning_rate': 1.977666108319375e-05, 'epoch': 0.03}


  2%|▏         | 30/1791 [01:26<1:21:56,  2.79s/it]

{'loss': 2.6641, 'grad_norm': 7.505693435668945, 'learning_rate': 1.9664991624790622e-05, 'epoch': 0.05}


  2%|▏         | 40/1791 [01:47<1:05:01,  2.23s/it]

{'loss': 2.5627, 'grad_norm': 8.268084526062012, 'learning_rate': 1.9553322166387494e-05, 'epoch': 0.07}


  3%|▎         | 50/1791 [02:11<59:47,  2.06s/it]  

{'loss': 2.5722, 'grad_norm': 7.0177321434021, 'learning_rate': 1.944165270798437e-05, 'epoch': 0.08}


  3%|▎         | 60/1791 [02:35<1:04:49,  2.25s/it]

{'loss': 2.5362, 'grad_norm': 7.130494594573975, 'learning_rate': 1.9329983249581243e-05, 'epoch': 0.1}


  4%|▍         | 70/1791 [03:04<1:13:17,  2.55s/it]

{'loss': 2.6017, 'grad_norm': 6.762292861938477, 'learning_rate': 1.9218313791178115e-05, 'epoch': 0.12}


  4%|▍         | 80/1791 [03:26<58:50,  2.06s/it]  

{'loss': 2.4335, 'grad_norm': 7.88276481628418, 'learning_rate': 1.9106644332774987e-05, 'epoch': 0.13}


  5%|▌         | 90/1791 [03:47<53:18,  1.88s/it]  

{'loss': 2.4819, 'grad_norm': 7.175753116607666, 'learning_rate': 1.899497487437186e-05, 'epoch': 0.15}


  6%|▌         | 100/1791 [04:11<1:04:47,  2.30s/it]

{'loss': 2.5167, 'grad_norm': 7.504319190979004, 'learning_rate': 1.8883305415968732e-05, 'epoch': 0.17}


  6%|▌         | 110/1791 [04:34<1:14:48,  2.67s/it]

{'loss': 2.4278, 'grad_norm': 6.561126708984375, 'learning_rate': 1.8771635957565608e-05, 'epoch': 0.18}


  7%|▋         | 120/1791 [04:56<1:01:11,  2.20s/it]

{'loss': 2.5152, 'grad_norm': 6.5772705078125, 'learning_rate': 1.865996649916248e-05, 'epoch': 0.2}


  7%|▋         | 130/1791 [05:17<52:51,  1.91s/it]  

{'loss': 2.4678, 'grad_norm': 7.65681266784668, 'learning_rate': 1.8548297040759353e-05, 'epoch': 0.22}


  8%|▊         | 140/1791 [05:40<1:12:13,  2.62s/it]

{'loss': 2.4836, 'grad_norm': 7.498748302459717, 'learning_rate': 1.8436627582356228e-05, 'epoch': 0.23}


  8%|▊         | 150/1791 [06:03<55:03,  2.01s/it]  

{'loss': 2.3597, 'grad_norm': 7.310708522796631, 'learning_rate': 1.83249581239531e-05, 'epoch': 0.25}


  9%|▉         | 160/1791 [06:24<50:41,  1.86s/it]  

{'loss': 2.2959, 'grad_norm': 7.321610927581787, 'learning_rate': 1.8213288665549973e-05, 'epoch': 0.27}


  9%|▉         | 170/1791 [06:48<1:15:20,  2.79s/it]

{'loss': 2.2746, 'grad_norm': 6.365433216094971, 'learning_rate': 1.810161920714685e-05, 'epoch': 0.28}


 10%|█         | 180/1791 [07:12<57:52,  2.16s/it]  

{'loss': 2.2312, 'grad_norm': 7.63909912109375, 'learning_rate': 1.798994974874372e-05, 'epoch': 0.3}


 11%|█         | 190/1791 [07:34<1:06:31,  2.49s/it]

{'loss': 2.3723, 'grad_norm': 7.633241653442383, 'learning_rate': 1.7878280290340594e-05, 'epoch': 0.32}


 11%|█         | 200/1791 [07:56<54:46,  2.07s/it]  

{'loss': 2.303, 'grad_norm': 7.9633636474609375, 'learning_rate': 1.7766610831937466e-05, 'epoch': 0.34}


 12%|█▏        | 210/1791 [08:19<57:54,  2.20s/it]  

{'loss': 2.3776, 'grad_norm': 7.8838114738464355, 'learning_rate': 1.765494137353434e-05, 'epoch': 0.35}


 12%|█▏        | 220/1791 [08:42<1:09:38,  2.66s/it]

{'loss': 2.4778, 'grad_norm': 7.265848636627197, 'learning_rate': 1.7543271915131214e-05, 'epoch': 0.37}


 13%|█▎        | 230/1791 [09:08<1:08:43,  2.64s/it]

{'loss': 2.3843, 'grad_norm': 6.5001091957092285, 'learning_rate': 1.7431602456728086e-05, 'epoch': 0.39}


 13%|█▎        | 240/1791 [09:30<50:11,  1.94s/it]  

{'loss': 2.3591, 'grad_norm': 8.343018531799316, 'learning_rate': 1.731993299832496e-05, 'epoch': 0.4}


 14%|█▍        | 250/1791 [09:52<59:13,  2.31s/it]  

{'loss': 2.3953, 'grad_norm': 7.938104152679443, 'learning_rate': 1.720826353992183e-05, 'epoch': 0.42}


 15%|█▍        | 260/1791 [10:15<1:00:56,  2.39s/it]

{'loss': 2.3006, 'grad_norm': 6.291754245758057, 'learning_rate': 1.7096594081518707e-05, 'epoch': 0.44}


 15%|█▌        | 270/1791 [10:50<1:24:16,  3.32s/it]

{'loss': 2.5011, 'grad_norm': 7.071861267089844, 'learning_rate': 1.698492462311558e-05, 'epoch': 0.45}


 16%|█▌        | 280/1791 [11:15<1:10:31,  2.80s/it]

{'loss': 2.3401, 'grad_norm': 7.517034530639648, 'learning_rate': 1.687325516471245e-05, 'epoch': 0.47}


 16%|█▌        | 290/1791 [11:40<1:08:15,  2.73s/it]

{'loss': 2.3491, 'grad_norm': 5.834240913391113, 'learning_rate': 1.6761585706309327e-05, 'epoch': 0.49}


 17%|█▋        | 300/1791 [12:11<1:07:55,  2.73s/it]

{'loss': 2.5075, 'grad_norm': 6.877931118011475, 'learning_rate': 1.66499162479062e-05, 'epoch': 0.5}


 17%|█▋        | 310/1791 [12:34<1:02:36,  2.54s/it]

{'loss': 2.3901, 'grad_norm': 6.778449058532715, 'learning_rate': 1.6538246789503072e-05, 'epoch': 0.52}


 18%|█▊        | 320/1791 [12:57<57:58,  2.36s/it]  

{'loss': 2.2649, 'grad_norm': 6.428062438964844, 'learning_rate': 1.6426577331099948e-05, 'epoch': 0.54}


 18%|█▊        | 330/1791 [13:21<52:42,  2.16s/it]  

{'loss': 2.3398, 'grad_norm': 7.145291328430176, 'learning_rate': 1.631490787269682e-05, 'epoch': 0.55}


 19%|█▉        | 340/1791 [13:48<56:30,  2.34s/it]  

{'loss': 2.2341, 'grad_norm': 7.023598670959473, 'learning_rate': 1.6203238414293693e-05, 'epoch': 0.57}


 20%|█▉        | 350/1791 [14:12<52:39,  2.19s/it]  

{'loss': 2.3413, 'grad_norm': 6.735317707061768, 'learning_rate': 1.6091568955890565e-05, 'epoch': 0.59}


 20%|██        | 360/1791 [14:40<53:11,  2.23s/it]  

{'loss': 2.2846, 'grad_norm': 6.741580486297607, 'learning_rate': 1.5979899497487437e-05, 'epoch': 0.6}


 21%|██        | 370/1791 [15:03<55:17,  2.33s/it]

{'loss': 2.2713, 'grad_norm': 7.235286712646484, 'learning_rate': 1.5868230039084313e-05, 'epoch': 0.62}


 21%|██        | 380/1791 [15:26<48:01,  2.04s/it]  

{'loss': 2.2739, 'grad_norm': 6.491893291473389, 'learning_rate': 1.5756560580681185e-05, 'epoch': 0.64}


 22%|██▏       | 390/1791 [15:49<47:02,  2.01s/it]  

{'loss': 2.4574, 'grad_norm': 7.204843521118164, 'learning_rate': 1.5644891122278058e-05, 'epoch': 0.65}


 22%|██▏       | 400/1791 [16:14<1:01:01,  2.63s/it]

{'loss': 2.4297, 'grad_norm': 7.131882190704346, 'learning_rate': 1.553322166387493e-05, 'epoch': 0.67}


 23%|██▎       | 410/1791 [16:42<59:01,  2.56s/it]  

{'loss': 2.3704, 'grad_norm': 7.664714813232422, 'learning_rate': 1.5421552205471802e-05, 'epoch': 0.69}


 23%|██▎       | 420/1791 [17:08<1:01:08,  2.68s/it]

{'loss': 2.3579, 'grad_norm': 7.263792037963867, 'learning_rate': 1.5309882747068678e-05, 'epoch': 0.7}


 24%|██▍       | 430/1791 [17:30<50:51,  2.24s/it]  

{'loss': 2.2564, 'grad_norm': 7.77029275894165, 'learning_rate': 1.519821328866555e-05, 'epoch': 0.72}


 25%|██▍       | 440/1791 [17:57<1:02:16,  2.77s/it]

{'loss': 2.4331, 'grad_norm': 7.1337971687316895, 'learning_rate': 1.5086543830262423e-05, 'epoch': 0.74}


 25%|██▌       | 450/1791 [18:19<47:26,  2.12s/it]  

{'loss': 2.3038, 'grad_norm': 7.433918476104736, 'learning_rate': 1.4974874371859299e-05, 'epoch': 0.75}


 26%|██▌       | 460/1791 [18:42<45:33,  2.05s/it]

{'loss': 2.2027, 'grad_norm': 6.931824207305908, 'learning_rate': 1.4863204913456171e-05, 'epoch': 0.77}


 26%|██▌       | 470/1791 [19:06<50:17,  2.28s/it]

{'loss': 2.3279, 'grad_norm': 7.918239593505859, 'learning_rate': 1.4751535455053043e-05, 'epoch': 0.79}


 27%|██▋       | 480/1791 [19:32<50:57,  2.33s/it]  

{'loss': 2.3381, 'grad_norm': 7.008042335510254, 'learning_rate': 1.4639865996649917e-05, 'epoch': 0.8}


 27%|██▋       | 490/1791 [19:56<1:04:58,  3.00s/it]

{'loss': 2.2053, 'grad_norm': 6.4391679763793945, 'learning_rate': 1.452819653824679e-05, 'epoch': 0.82}


Non-default generation parameters: {'max_length': 512, 'num_beams': 4, 'bad_words_ids': [[62833]], 'forced_eos_token_id': 0}


{'loss': 2.2334, 'grad_norm': 6.465289115905762, 'learning_rate': 1.4416527079843662e-05, 'epoch': 0.84}


 28%|██▊       | 510/1791 [20:47<53:08,  2.49s/it]  

{'loss': 2.3601, 'grad_norm': 7.628950119018555, 'learning_rate': 1.4304857621440538e-05, 'epoch': 0.85}


 29%|██▉       | 520/1791 [21:12<55:51,  2.64s/it]

{'loss': 2.1917, 'grad_norm': 6.275561332702637, 'learning_rate': 1.419318816303741e-05, 'epoch': 0.87}


 30%|██▉       | 530/1791 [21:36<56:29,  2.69s/it]

{'loss': 2.2661, 'grad_norm': 6.418466567993164, 'learning_rate': 1.4081518704634283e-05, 'epoch': 0.89}


 30%|███       | 540/1791 [22:00<47:36,  2.28s/it]

{'loss': 2.3115, 'grad_norm': 7.16494607925415, 'learning_rate': 1.3969849246231157e-05, 'epoch': 0.9}


 31%|███       | 550/1791 [22:23<48:15,  2.33s/it]

{'loss': 2.3745, 'grad_norm': 7.352195739746094, 'learning_rate': 1.385817978782803e-05, 'epoch': 0.92}


 31%|███▏      | 560/1791 [22:52<58:35,  2.86s/it]  

{'loss': 2.2108, 'grad_norm': 6.98187780380249, 'learning_rate': 1.3746510329424903e-05, 'epoch': 0.94}


 32%|███▏      | 570/1791 [23:16<47:33,  2.34s/it]

{'loss': 2.3654, 'grad_norm': 6.376901149749756, 'learning_rate': 1.3634840871021777e-05, 'epoch': 0.95}


 32%|███▏      | 580/1791 [23:43<49:42,  2.46s/it]  

{'loss': 2.1532, 'grad_norm': 6.134869575500488, 'learning_rate': 1.352317141261865e-05, 'epoch': 0.97}


 33%|███▎      | 590/1791 [24:04<46:24,  2.32s/it]

{'loss': 2.1538, 'grad_norm': 6.684973239898682, 'learning_rate': 1.3411501954215522e-05, 'epoch': 0.99}


 34%|███▎      | 600/1791 [24:28<47:28,  2.39s/it]

{'loss': 2.1868, 'grad_norm': 6.052950382232666, 'learning_rate': 1.3299832495812398e-05, 'epoch': 1.01}


 34%|███▍      | 610/1791 [24:55<46:24,  2.36s/it]

{'loss': 2.0599, 'grad_norm': 7.165792942047119, 'learning_rate': 1.318816303740927e-05, 'epoch': 1.02}


 35%|███▍      | 620/1791 [25:18<44:48,  2.30s/it]

{'loss': 2.146, 'grad_norm': 6.631860733032227, 'learning_rate': 1.3076493579006142e-05, 'epoch': 1.04}


 35%|███▌      | 630/1791 [25:44<46:00,  2.38s/it]  

{'loss': 2.1059, 'grad_norm': 6.517052173614502, 'learning_rate': 1.2964824120603017e-05, 'epoch': 1.06}


 36%|███▌      | 640/1791 [26:10<55:59,  2.92s/it]

{'loss': 2.0871, 'grad_norm': 7.046426296234131, 'learning_rate': 1.2853154662199889e-05, 'epoch': 1.07}


 36%|███▋      | 650/1791 [26:36<55:40,  2.93s/it]

{'loss': 2.0568, 'grad_norm': 6.3145751953125, 'learning_rate': 1.2741485203796761e-05, 'epoch': 1.09}


 37%|███▋      | 660/1791 [26:58<37:25,  1.99s/it]

{'loss': 1.915, 'grad_norm': 8.52444076538086, 'learning_rate': 1.2629815745393637e-05, 'epoch': 1.11}


 37%|███▋      | 670/1791 [27:21<45:02,  2.41s/it]

{'loss': 2.0215, 'grad_norm': 5.825560092926025, 'learning_rate': 1.251814628699051e-05, 'epoch': 1.12}


 38%|███▊      | 680/1791 [27:45<41:47,  2.26s/it]

{'loss': 1.9881, 'grad_norm': 7.043364524841309, 'learning_rate': 1.2406476828587382e-05, 'epoch': 1.14}


 39%|███▊      | 690/1791 [28:08<42:54,  2.34s/it]

{'loss': 1.9585, 'grad_norm': 7.229592800140381, 'learning_rate': 1.2294807370184256e-05, 'epoch': 1.16}


 39%|███▉      | 700/1791 [28:35<46:17,  2.55s/it]

{'loss': 1.9839, 'grad_norm': 7.7545881271362305, 'learning_rate': 1.2183137911781128e-05, 'epoch': 1.17}


 40%|███▉      | 710/1791 [29:03<50:18,  2.79s/it]  

{'loss': 2.1064, 'grad_norm': 5.925206661224365, 'learning_rate': 1.2071468453378e-05, 'epoch': 1.19}


 40%|████      | 720/1791 [29:26<40:43,  2.28s/it]

{'loss': 2.058, 'grad_norm': 6.368833065032959, 'learning_rate': 1.1959798994974876e-05, 'epoch': 1.21}


 41%|████      | 730/1791 [29:51<46:50,  2.65s/it]

{'loss': 2.2614, 'grad_norm': 6.223285675048828, 'learning_rate': 1.1848129536571749e-05, 'epoch': 1.22}


 41%|████▏     | 740/1791 [30:16<45:48,  2.62s/it]

{'loss': 2.0178, 'grad_norm': 6.792660713195801, 'learning_rate': 1.1736460078168621e-05, 'epoch': 1.24}


 42%|████▏     | 750/1791 [30:40<48:48,  2.81s/it]

{'loss': 2.1523, 'grad_norm': 6.867122173309326, 'learning_rate': 1.1624790619765495e-05, 'epoch': 1.26}


 42%|████▏     | 760/1791 [31:05<42:59,  2.50s/it]

{'loss': 2.0369, 'grad_norm': 6.761822700500488, 'learning_rate': 1.1513121161362369e-05, 'epoch': 1.27}


 43%|████▎     | 770/1791 [31:31<42:31,  2.50s/it]

{'loss': 2.0588, 'grad_norm': 7.353274345397949, 'learning_rate': 1.1401451702959241e-05, 'epoch': 1.29}


 44%|████▎     | 780/1791 [31:54<42:07,  2.50s/it]

{'loss': 2.1057, 'grad_norm': 6.706447601318359, 'learning_rate': 1.1289782244556116e-05, 'epoch': 1.31}


 44%|████▍     | 790/1791 [32:21<37:09,  2.23s/it]

{'loss': 2.1965, 'grad_norm': 6.350930690765381, 'learning_rate': 1.1178112786152988e-05, 'epoch': 1.32}


 45%|████▍     | 800/1791 [32:44<35:15,  2.13s/it]

{'loss': 2.0978, 'grad_norm': 8.079146385192871, 'learning_rate': 1.106644332774986e-05, 'epoch': 1.34}


 45%|████▌     | 810/1791 [33:14<50:31,  3.09s/it]  

{'loss': 2.1206, 'grad_norm': 6.180726051330566, 'learning_rate': 1.0954773869346736e-05, 'epoch': 1.36}


 46%|████▌     | 820/1791 [33:38<39:58,  2.47s/it]

{'loss': 1.9715, 'grad_norm': 7.1324238777160645, 'learning_rate': 1.0843104410943608e-05, 'epoch': 1.37}


 46%|████▋     | 830/1791 [34:02<36:51,  2.30s/it]

{'loss': 2.0422, 'grad_norm': 6.910171031951904, 'learning_rate': 1.073143495254048e-05, 'epoch': 1.39}


 47%|████▋     | 840/1791 [34:26<35:47,  2.26s/it]

{'loss': 1.9262, 'grad_norm': 6.939675331115723, 'learning_rate': 1.0619765494137355e-05, 'epoch': 1.41}


 47%|████▋     | 850/1791 [34:54<41:14,  2.63s/it]

{'loss': 2.0972, 'grad_norm': 6.742696762084961, 'learning_rate': 1.0508096035734227e-05, 'epoch': 1.42}


 48%|████▊     | 860/1791 [35:16<37:20,  2.41s/it]

{'loss': 1.9819, 'grad_norm': 6.04594087600708, 'learning_rate': 1.03964265773311e-05, 'epoch': 1.44}


 49%|████▊     | 870/1791 [35:40<34:49,  2.27s/it]

{'loss': 2.0453, 'grad_norm': 6.040597438812256, 'learning_rate': 1.0284757118927975e-05, 'epoch': 1.46}


 49%|████▉     | 880/1791 [36:01<34:31,  2.27s/it]

{'loss': 1.8781, 'grad_norm': 6.050278663635254, 'learning_rate': 1.0173087660524848e-05, 'epoch': 1.47}


 50%|████▉     | 890/1791 [36:31<45:33,  3.03s/it]

{'loss': 2.0568, 'grad_norm': 6.408959865570068, 'learning_rate': 1.006141820212172e-05, 'epoch': 1.49}


 50%|█████     | 900/1791 [36:53<31:47,  2.14s/it]

{'loss': 1.8823, 'grad_norm': 6.673653602600098, 'learning_rate': 9.949748743718594e-06, 'epoch': 1.51}


 51%|█████     | 910/1791 [37:14<29:08,  1.99s/it]

{'loss': 1.9282, 'grad_norm': 6.555959701538086, 'learning_rate': 9.838079285315466e-06, 'epoch': 1.52}


 51%|█████▏    | 920/1791 [37:41<40:27,  2.79s/it]

{'loss': 1.9809, 'grad_norm': 6.218725204467773, 'learning_rate': 9.72640982691234e-06, 'epoch': 1.54}


 52%|█████▏    | 930/1791 [38:04<34:41,  2.42s/it]

{'loss': 1.9351, 'grad_norm': 6.084456920623779, 'learning_rate': 9.614740368509213e-06, 'epoch': 1.56}


 52%|█████▏    | 940/1791 [38:29<34:42,  2.45s/it]

{'loss': 1.9796, 'grad_norm': 7.285674571990967, 'learning_rate': 9.503070910106087e-06, 'epoch': 1.57}


 53%|█████▎    | 950/1791 [38:51<31:08,  2.22s/it]

{'loss': 1.9302, 'grad_norm': 8.568098068237305, 'learning_rate': 9.39140145170296e-06, 'epoch': 1.59}


 54%|█████▎    | 960/1791 [39:14<35:37,  2.57s/it]

{'loss': 2.0986, 'grad_norm': 6.426123142242432, 'learning_rate': 9.279731993299833e-06, 'epoch': 1.61}


 54%|█████▍    | 970/1791 [39:39<37:26,  2.74s/it]

{'loss': 1.9856, 'grad_norm': 6.871554374694824, 'learning_rate': 9.168062534896707e-06, 'epoch': 1.62}


 55%|█████▍    | 980/1791 [40:07<33:27,  2.47s/it]

{'loss': 1.958, 'grad_norm': 6.473342418670654, 'learning_rate': 9.05639307649358e-06, 'epoch': 1.64}


 55%|█████▌    | 990/1791 [40:34<39:49,  2.98s/it]

{'loss': 2.1901, 'grad_norm': 6.803134918212891, 'learning_rate': 8.944723618090452e-06, 'epoch': 1.66}


Non-default generation parameters: {'max_length': 512, 'num_beams': 4, 'bad_words_ids': [[62833]], 'forced_eos_token_id': 0}


{'loss': 1.9743, 'grad_norm': 7.23629903793335, 'learning_rate': 8.833054159687326e-06, 'epoch': 1.68}


 56%|█████▋    | 1010/1791 [41:25<31:12,  2.40s/it]

{'loss': 2.0675, 'grad_norm': 6.303682804107666, 'learning_rate': 8.721384701284199e-06, 'epoch': 1.69}


 57%|█████▋    | 1020/1791 [41:52<35:11,  2.74s/it]

{'loss': 2.1221, 'grad_norm': 6.2769598960876465, 'learning_rate': 8.609715242881073e-06, 'epoch': 1.71}


 58%|█████▊    | 1030/1791 [42:17<34:27,  2.72s/it]

{'loss': 1.8855, 'grad_norm': 5.839977741241455, 'learning_rate': 8.498045784477947e-06, 'epoch': 1.73}


 58%|█████▊    | 1040/1791 [42:46<34:44,  2.78s/it]

{'loss': 2.061, 'grad_norm': 6.8747382164001465, 'learning_rate': 8.386376326074819e-06, 'epoch': 1.74}


 59%|█████▊    | 1050/1791 [43:13<35:06,  2.84s/it]

{'loss': 2.0993, 'grad_norm': 7.340085029602051, 'learning_rate': 8.274706867671693e-06, 'epoch': 1.76}


 59%|█████▉    | 1060/1791 [43:38<30:09,  2.48s/it]

{'loss': 2.083, 'grad_norm': 7.21161413192749, 'learning_rate': 8.163037409268565e-06, 'epoch': 1.78}


 60%|█████▉    | 1070/1791 [43:59<24:07,  2.01s/it]

{'loss': 1.9033, 'grad_norm': 6.923187732696533, 'learning_rate': 8.051367950865438e-06, 'epoch': 1.79}


 60%|██████    | 1080/1791 [44:22<27:09,  2.29s/it]

{'loss': 2.0281, 'grad_norm': 6.870092868804932, 'learning_rate': 7.939698492462312e-06, 'epoch': 1.81}


 61%|██████    | 1090/1791 [44:44<26:18,  2.25s/it]

{'loss': 1.9449, 'grad_norm': 6.469162940979004, 'learning_rate': 7.828029034059186e-06, 'epoch': 1.83}


 61%|██████▏   | 1100/1791 [45:08<25:06,  2.18s/it]

{'loss': 1.9801, 'grad_norm': 6.874406337738037, 'learning_rate': 7.716359575656058e-06, 'epoch': 1.84}


 62%|██████▏   | 1110/1791 [45:32<28:14,  2.49s/it]

{'loss': 1.8143, 'grad_norm': 6.436646461486816, 'learning_rate': 7.604690117252932e-06, 'epoch': 1.86}


 63%|██████▎   | 1120/1791 [45:55<24:54,  2.23s/it]

{'loss': 2.066, 'grad_norm': 6.579381942749023, 'learning_rate': 7.4930206588498056e-06, 'epoch': 1.88}


 63%|██████▎   | 1130/1791 [46:21<29:25,  2.67s/it]

{'loss': 1.9871, 'grad_norm': 6.029115200042725, 'learning_rate': 7.381351200446678e-06, 'epoch': 1.89}


 64%|██████▎   | 1140/1791 [46:46<26:44,  2.46s/it]

{'loss': 1.9985, 'grad_norm': 7.261022567749023, 'learning_rate': 7.269681742043552e-06, 'epoch': 1.91}


 64%|██████▍   | 1150/1791 [47:12<29:22,  2.75s/it]

{'loss': 2.0138, 'grad_norm': 6.973844528198242, 'learning_rate': 7.158012283640425e-06, 'epoch': 1.93}


 65%|██████▍   | 1160/1791 [47:38<25:16,  2.40s/it]

{'loss': 1.9674, 'grad_norm': 7.103800296783447, 'learning_rate': 7.0463428252372976e-06, 'epoch': 1.94}


 65%|██████▌   | 1170/1791 [48:02<25:59,  2.51s/it]

{'loss': 2.0201, 'grad_norm': 6.516567707061768, 'learning_rate': 6.934673366834172e-06, 'epoch': 1.96}


 66%|██████▌   | 1180/1791 [48:25<22:54,  2.25s/it]

{'loss': 1.9876, 'grad_norm': 6.455190181732178, 'learning_rate': 6.823003908431045e-06, 'epoch': 1.98}


 66%|██████▋   | 1190/1791 [48:51<24:03,  2.40s/it]

{'loss': 2.0002, 'grad_norm': 7.64526891708374, 'learning_rate': 6.711334450027917e-06, 'epoch': 1.99}


 67%|██████▋   | 1200/1791 [49:15<21:48,  2.21s/it]

{'loss': 1.9513, 'grad_norm': 7.316224098205566, 'learning_rate': 6.599664991624791e-06, 'epoch': 2.01}


 68%|██████▊   | 1210/1791 [49:43<25:34,  2.64s/it]

{'loss': 1.7665, 'grad_norm': 6.809199333190918, 'learning_rate': 6.4879955332216645e-06, 'epoch': 2.03}


 68%|██████▊   | 1220/1791 [50:08<24:39,  2.59s/it]

{'loss': 1.8919, 'grad_norm': 5.68342924118042, 'learning_rate': 6.376326074818538e-06, 'epoch': 2.04}


 69%|██████▊   | 1230/1791 [50:36<23:55,  2.56s/it]

{'loss': 1.8767, 'grad_norm': 6.822322845458984, 'learning_rate': 6.264656616415411e-06, 'epoch': 2.06}


 69%|██████▉   | 1240/1791 [51:00<23:05,  2.52s/it]

{'loss': 1.9012, 'grad_norm': 6.985029697418213, 'learning_rate': 6.152987158012285e-06, 'epoch': 2.08}


 70%|██████▉   | 1250/1791 [51:27<24:00,  2.66s/it]

{'loss': 1.9929, 'grad_norm': 6.917954921722412, 'learning_rate': 6.041317699609157e-06, 'epoch': 2.09}


 70%|███████   | 1260/1791 [51:53<22:40,  2.56s/it]

{'loss': 1.9416, 'grad_norm': 6.026356220245361, 'learning_rate': 5.9296482412060305e-06, 'epoch': 2.11}


 71%|███████   | 1270/1791 [52:20<25:36,  2.95s/it]

{'loss': 1.8665, 'grad_norm': 6.125023365020752, 'learning_rate': 5.817978782802905e-06, 'epoch': 2.13}


 71%|███████▏  | 1280/1791 [52:48<27:15,  3.20s/it]

{'loss': 2.0371, 'grad_norm': 6.388654708862305, 'learning_rate': 5.706309324399777e-06, 'epoch': 2.14}


 72%|███████▏  | 1290/1791 [53:13<18:55,  2.27s/it]

{'loss': 1.9519, 'grad_norm': 6.379548072814941, 'learning_rate': 5.59463986599665e-06, 'epoch': 2.16}


 73%|███████▎  | 1300/1791 [53:36<18:24,  2.25s/it]

{'loss': 1.9939, 'grad_norm': 7.662683010101318, 'learning_rate': 5.482970407593524e-06, 'epoch': 2.18}


 73%|███████▎  | 1310/1791 [54:00<17:22,  2.17s/it]

{'loss': 2.06, 'grad_norm': 6.47686767578125, 'learning_rate': 5.371300949190397e-06, 'epoch': 2.19}


 74%|███████▎  | 1320/1791 [54:24<16:54,  2.15s/it]

{'loss': 1.957, 'grad_norm': 7.036011219024658, 'learning_rate': 5.259631490787271e-06, 'epoch': 2.21}


 74%|███████▍  | 1330/1791 [54:44<16:18,  2.12s/it]

{'loss': 1.8578, 'grad_norm': 6.391092300415039, 'learning_rate': 5.147962032384143e-06, 'epoch': 2.23}


 75%|███████▍  | 1340/1791 [55:05<14:37,  1.95s/it]

{'loss': 1.8597, 'grad_norm': 6.159343242645264, 'learning_rate': 5.036292573981016e-06, 'epoch': 2.24}


 75%|███████▌  | 1350/1791 [55:27<15:02,  2.05s/it]

{'loss': 2.0081, 'grad_norm': 5.807157039642334, 'learning_rate': 4.92462311557789e-06, 'epoch': 2.26}


 76%|███████▌  | 1360/1791 [55:49<14:33,  2.03s/it]

{'loss': 1.8174, 'grad_norm': 6.798040390014648, 'learning_rate': 4.8129536571747635e-06, 'epoch': 2.28}


 76%|███████▋  | 1370/1791 [56:12<14:30,  2.07s/it]

{'loss': 1.7179, 'grad_norm': 7.095826148986816, 'learning_rate': 4.701284198771636e-06, 'epoch': 2.29}


 77%|███████▋  | 1380/1791 [56:43<19:17,  2.82s/it]

{'loss': 1.8323, 'grad_norm': 7.654604434967041, 'learning_rate': 4.58961474036851e-06, 'epoch': 2.31}


 78%|███████▊  | 1390/1791 [57:11<19:00,  2.84s/it]

{'loss': 2.0587, 'grad_norm': 6.2026848793029785, 'learning_rate': 4.477945281965383e-06, 'epoch': 2.33}


 78%|███████▊  | 1400/1791 [57:37<17:35,  2.70s/it]

{'loss': 1.7805, 'grad_norm': 6.290980815887451, 'learning_rate': 4.366275823562256e-06, 'epoch': 2.35}


 79%|███████▊  | 1410/1791 [58:08<18:21,  2.89s/it]

{'loss': 1.7712, 'grad_norm': 6.111023902893066, 'learning_rate': 4.254606365159129e-06, 'epoch': 2.36}


 79%|███████▉  | 1420/1791 [58:39<20:11,  3.27s/it]

{'loss': 1.7943, 'grad_norm': 6.9281487464904785, 'learning_rate': 4.142936906756003e-06, 'epoch': 2.38}


 80%|███████▉  | 1430/1791 [59:11<16:37,  2.76s/it]

{'loss': 2.0353, 'grad_norm': 7.653488636016846, 'learning_rate': 4.031267448352876e-06, 'epoch': 2.4}


 80%|████████  | 1440/1791 [59:42<17:19,  2.96s/it]

{'loss': 1.8408, 'grad_norm': 5.526890754699707, 'learning_rate': 3.919597989949749e-06, 'epoch': 2.41}


 81%|████████  | 1450/1791 [1:00:06<13:25,  2.36s/it]

{'loss': 1.6618, 'grad_norm': 6.826206684112549, 'learning_rate': 3.8079285315466224e-06, 'epoch': 2.43}


 82%|████████▏ | 1460/1791 [1:00:36<15:13,  2.76s/it]

{'loss': 1.8791, 'grad_norm': 6.900865077972412, 'learning_rate': 3.6962590731434956e-06, 'epoch': 2.45}


 82%|████████▏ | 1470/1791 [1:01:03<13:12,  2.47s/it]

{'loss': 1.9, 'grad_norm': 6.815740585327148, 'learning_rate': 3.5845896147403684e-06, 'epoch': 2.46}


 83%|████████▎ | 1480/1791 [1:01:28<12:25,  2.40s/it]

{'loss': 1.8397, 'grad_norm': 6.570197105407715, 'learning_rate': 3.472920156337242e-06, 'epoch': 2.48}


 83%|████████▎ | 1490/1791 [1:01:57<14:57,  2.98s/it]

{'loss': 1.8617, 'grad_norm': 6.274089813232422, 'learning_rate': 3.3612506979341152e-06, 'epoch': 2.5}


Non-default generation parameters: {'max_length': 512, 'num_beams': 4, 'bad_words_ids': [[62833]], 'forced_eos_token_id': 0}


{'loss': 1.8042, 'grad_norm': 6.925150394439697, 'learning_rate': 3.2495812395309884e-06, 'epoch': 2.51}


 84%|████████▍ | 1510/1791 [1:02:54<13:34,  2.90s/it]

{'loss': 1.9126, 'grad_norm': 6.605889320373535, 'learning_rate': 3.137911781127862e-06, 'epoch': 2.53}


 85%|████████▍ | 1520/1791 [1:03:21<13:01,  2.88s/it]

{'loss': 1.8843, 'grad_norm': 8.044941902160645, 'learning_rate': 3.026242322724735e-06, 'epoch': 2.55}


 85%|████████▌ | 1530/1791 [1:03:47<11:33,  2.66s/it]

{'loss': 1.7853, 'grad_norm': 6.2023797035217285, 'learning_rate': 2.914572864321608e-06, 'epoch': 2.56}


 86%|████████▌ | 1540/1791 [1:04:15<10:57,  2.62s/it]

{'loss': 1.8479, 'grad_norm': 6.250741958618164, 'learning_rate': 2.8029034059184817e-06, 'epoch': 2.58}


 87%|████████▋ | 1550/1791 [1:04:42<10:57,  2.73s/it]

{'loss': 1.8128, 'grad_norm': 7.040643215179443, 'learning_rate': 2.691233947515355e-06, 'epoch': 2.6}


 87%|████████▋ | 1560/1791 [1:05:11<12:17,  3.19s/it]

{'loss': 1.7777, 'grad_norm': 6.118005275726318, 'learning_rate': 2.5795644891122277e-06, 'epoch': 2.61}


 88%|████████▊ | 1570/1791 [1:05:36<08:04,  2.19s/it]

{'loss': 1.8682, 'grad_norm': 6.782948970794678, 'learning_rate': 2.4678950307091013e-06, 'epoch': 2.63}


 88%|████████▊ | 1580/1791 [1:05:56<06:28,  1.84s/it]

{'loss': 1.9271, 'grad_norm': 6.203070640563965, 'learning_rate': 2.3562255723059746e-06, 'epoch': 2.65}


 89%|████████▉ | 1590/1791 [1:06:19<07:40,  2.29s/it]

{'loss': 1.9052, 'grad_norm': 6.399803161621094, 'learning_rate': 2.2445561139028478e-06, 'epoch': 2.66}


 89%|████████▉ | 1600/1791 [1:06:37<05:55,  1.86s/it]

{'loss': 1.8692, 'grad_norm': 6.574288368225098, 'learning_rate': 2.132886655499721e-06, 'epoch': 2.68}


 90%|████████▉ | 1610/1791 [1:06:58<05:50,  1.94s/it]

{'loss': 1.7779, 'grad_norm': 6.774652481079102, 'learning_rate': 2.021217197096594e-06, 'epoch': 2.7}


 90%|█████████ | 1620/1791 [1:07:16<05:10,  1.82s/it]

{'loss': 1.7522, 'grad_norm': 6.465766429901123, 'learning_rate': 1.9095477386934674e-06, 'epoch': 2.71}


 91%|█████████ | 1630/1791 [1:07:38<05:28,  2.04s/it]

{'loss': 1.8347, 'grad_norm': 7.091512203216553, 'learning_rate': 1.7978782802903408e-06, 'epoch': 2.73}


 92%|█████████▏| 1640/1791 [1:07:57<04:43,  1.88s/it]

{'loss': 1.8994, 'grad_norm': 6.980851173400879, 'learning_rate': 1.6862088218872138e-06, 'epoch': 2.75}


 92%|█████████▏| 1650/1791 [1:08:18<04:21,  1.85s/it]

{'loss': 1.9023, 'grad_norm': 7.210748672485352, 'learning_rate': 1.5745393634840873e-06, 'epoch': 2.76}


 93%|█████████▎| 1660/1791 [1:08:41<05:14,  2.40s/it]

{'loss': 1.9914, 'grad_norm': 6.382097244262695, 'learning_rate': 1.4628699050809605e-06, 'epoch': 2.78}


 93%|█████████▎| 1670/1791 [1:09:12<05:53,  2.92s/it]

{'loss': 1.9089, 'grad_norm': 6.516096115112305, 'learning_rate': 1.3512004466778337e-06, 'epoch': 2.8}


 94%|█████████▍| 1680/1791 [1:09:41<05:56,  3.21s/it]

{'loss': 2.0049, 'grad_norm': 6.034126281738281, 'learning_rate': 1.2395309882747069e-06, 'epoch': 2.81}


 94%|█████████▍| 1690/1791 [1:10:09<04:23,  2.61s/it]

{'loss': 1.8111, 'grad_norm': 7.004663467407227, 'learning_rate': 1.12786152987158e-06, 'epoch': 2.83}


 95%|█████████▍| 1700/1791 [1:10:37<04:25,  2.92s/it]

{'loss': 1.9684, 'grad_norm': 6.755908489227295, 'learning_rate': 1.0161920714684535e-06, 'epoch': 2.85}


 95%|█████████▌| 1710/1791 [1:11:02<03:01,  2.24s/it]

{'loss': 1.7612, 'grad_norm': 6.5408501625061035, 'learning_rate': 9.045226130653267e-07, 'epoch': 2.86}


 96%|█████████▌| 1720/1791 [1:11:32<03:42,  3.14s/it]

{'loss': 1.9294, 'grad_norm': 6.236886501312256, 'learning_rate': 7.928531546621999e-07, 'epoch': 2.88}


 97%|█████████▋| 1730/1791 [1:12:02<02:30,  2.47s/it]

{'loss': 1.9327, 'grad_norm': 6.624107837677002, 'learning_rate': 6.811836962590732e-07, 'epoch': 2.9}


 97%|█████████▋| 1740/1791 [1:12:30<02:25,  2.86s/it]

{'loss': 1.8997, 'grad_norm': 6.346041679382324, 'learning_rate': 5.695142378559465e-07, 'epoch': 2.91}


 98%|█████████▊| 1750/1791 [1:12:57<02:09,  3.16s/it]

{'loss': 1.8125, 'grad_norm': 6.603186130523682, 'learning_rate': 4.5784477945281974e-07, 'epoch': 2.93}


 98%|█████████▊| 1760/1791 [1:13:24<01:16,  2.48s/it]

{'loss': 1.7985, 'grad_norm': 6.95505952835083, 'learning_rate': 3.4617532104969295e-07, 'epoch': 2.95}


 99%|█████████▉| 1770/1791 [1:13:55<01:03,  3.04s/it]

{'loss': 1.903, 'grad_norm': 6.684258937835693, 'learning_rate': 2.3450586264656616e-07, 'epoch': 2.96}


 99%|█████████▉| 1780/1791 [1:14:23<00:28,  2.61s/it]

{'loss': 1.8776, 'grad_norm': 6.185474872589111, 'learning_rate': 1.2283640424343942e-07, 'epoch': 2.98}


100%|█████████▉| 1790/1791 [1:14:52<00:02,  2.76s/it]

{'loss': 1.9577, 'grad_norm': 5.891530513763428, 'learning_rate': 1.1166945840312676e-08, 'epoch': 3.0}


100%|██████████| 1791/1791 [1:14:54<00:00,  2.51s/it]

{'train_runtime': 4494.2908, 'train_samples_per_second': 6.369, 'train_steps_per_second': 0.399, 'train_loss': 2.092758018999776, 'epoch': 3.0}





TrainOutput(global_step=1791, training_loss=2.092758018999776, metrics={'train_runtime': 4494.2908, 'train_samples_per_second': 6.369, 'train_steps_per_second': 0.399, 'train_loss': 2.092758018999776, 'epoch': 3.0})

In [17]:
# Save the model 
trainer.save_model('fineTunedHelsinki(16-2e-5-3)')

Non-default generation parameters: {'max_length': 512, 'num_beams': 4, 'bad_words_ids': [[62833]], 'forced_eos_token_id': 0}


### Evaluate the model

In [18]:
evaluation = trainer.evaluate(test_dataset)
print(evaluation)

100%|██████████| 299/299 [35:30<00:00,  7.13s/it] 

{'eval_loss': 2.0772271156311035, 'eval_bleu': 14.9882, 'eval_gen_len': 12.6119, 'eval_runtime': 2133.3137, 'eval_samples_per_second': 2.237, 'eval_steps_per_second': 0.14, 'epoch': 3.0}





In [19]:
example1 = 'ازيك ايه الاخبار؟'
example2= "مش فاضي. عندي امتحان بكرة"
example3 = "مسافر يوم السبت الجاي"

In [20]:
from transformers import pipeline
translator = pipeline("translation", model = 'fineTunedHelsinki(16-2e-5-3)')
print(translator(example1))
print(translator(example2))
print(translator(example3))

[{'translation_text': 'How are you doing?'}]
[{'translation_text': "I'm not available. I have a test tomorrow."}]
[{'translation_text': "He's going away on Saturday."}]
