In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from transformers import MBart50TokenizerFast,MBartForConditionalGeneration,Seq2SeqTrainingArguments, DataCollatorForSeq2Seq, Seq2SeqTrainer
from datasets import load_dataset, load_metric
import numpy as np
import evaluate

In [3]:
raw_datasets = load_dataset("Helsinki-NLP/opus-100", "en-id")
model_mbart = 'facebook/mbart-large-50-one-to-many-mmt'

In [4]:
tokenizer = MBart50TokenizerFast.from_pretrained(model_mbart,src_lang="en_XX",tgt_lang = "id_ID")

In [5]:
source_lang = "en"
target_lang = "id"

def preprocess(data):
  inputs = [dt[source_lang] for dt in data["translation"]]
  targets = [dt[target_lang] for dt in data["translation"]]
  model_inputs = tokenizer(inputs, truncation=True)

  with tokenizer.as_target_tokenizer():
    labels = tokenizer(targets, truncation=True)
  model_inputs["labels"] = labels["input_ids"]
  return model_inputs

tokenized_datasets = raw_datasets.map(preprocess, batched=True)

In [6]:
# Shuffle the train dataset and select 25% of it
small_train_dataset = (
    tokenized_datasets["train"]
    .shuffle(seed=42)
    .select(range(len(tokenized_datasets["train"]) // 10))
)

# Shuffle the test dataset and select 25% of it
small_eval_dataset = (
    tokenized_datasets["test"]
    .shuffle(seed=42)
    .select(range(len(tokenized_datasets["test"]) // 10))
)

In [7]:
len(small_train_dataset)

100000

In [8]:
model = MBartForConditionalGeneration.from_pretrained(model_mbart)
model = model.cuda()

In [9]:
hyperparameters = {
    'learning_rate': 1e-5,
    'batch_size': 8,
    'num_epochs': 5
}

args = Seq2SeqTrainingArguments(
    f"mbart-large-50-one-to-many-mmt-finetuned-en-to-id",
    evaluation_strategy="epoch",
    learning_rate=hyperparameters['learning_rate'],
    per_device_train_batch_size=hyperparameters['batch_size'],
    per_device_eval_batch_size=hyperparameters['batch_size'],
    weight_decay=0.01,
    save_total_limit=hyperparameters['num_epochs'],
    num_train_epochs=hyperparameters['num_epochs'],
    predict_with_generate=True,
)

In [10]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [11]:
metric = evaluate.load("sacrebleu")
meteor = evaluate.load('meteor')

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]
    return preds, labels


def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    # Some simple post-processing
    decoded_preds, decoded_labels = postprocess_text(
        decoded_preds, decoded_labels)
    result = metric.compute(predictions=decoded_preds,
                            references=decoded_labels)
    meteor_result = meteor.compute(
        predictions=decoded_preds, references=decoded_labels)
    prediction_lens = [np.count_nonzero(
        pred != tokenizer.pad_token_id) for pred in preds]
    result = {'bleu': result['score']}
    result["gen_len"] = np.mean(prediction_lens)
    result["meteor"] = meteor_result["meteor"]
    result = {k: round(v, 4) for k, v in result.items()}
    return result

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [12]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)
trainer.train()

  0%|          | 0/62500 [00:00<?, ?it/s]

Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 2.0911, 'grad_norm': 5.785200595855713, 'learning_rate': 9.920000000000002e-06, 'epoch': 0.04}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.8662, 'grad_norm': 6.672329902648926, 'learning_rate': 9.84e-06, 'epoch': 0.08}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.841, 'grad_norm': 5.563478469848633, 'learning_rate': 9.760000000000001e-06, 'epoch': 0.12}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.82, 'grad_norm': 6.603244781494141, 'learning_rate': 9.68e-06, 'epoch': 0.16}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.745, 'grad_norm': 6.93386173248291, 'learning_rate': 9.600000000000001e-06, 'epoch': 0.2}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.8358, 'grad_norm': 5.9870123863220215, 'learning_rate': 9.52e-06, 'epoch': 0.24}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.757, 'grad_norm': 6.331268310546875, 'learning_rate': 9.440000000000001e-06, 'epoch': 0.28}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7408, 'grad_norm': 4.707740306854248, 'learning_rate': 9.360000000000002e-06, 'epoch': 0.32}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7062, 'grad_norm': 5.813915252685547, 'learning_rate': 9.280000000000001e-06, 'epoch': 0.36}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7191, 'grad_norm': 5.343817710876465, 'learning_rate': 9.200000000000002e-06, 'epoch': 0.4}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7025, 'grad_norm': 5.883517265319824, 'learning_rate': 9.12e-06, 'epoch': 0.44}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7138, 'grad_norm': 5.167580604553223, 'learning_rate': 9.040000000000002e-06, 'epoch': 0.48}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7301, 'grad_norm': 6.895096778869629, 'learning_rate': 8.96e-06, 'epoch': 0.52}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7222, 'grad_norm': 5.092913627624512, 'learning_rate': 8.880000000000001e-06, 'epoch': 0.56}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.7197, 'grad_norm': 4.389745235443115, 'learning_rate': 8.8e-06, 'epoch': 0.6}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6697, 'grad_norm': 7.049737930297852, 'learning_rate': 8.720000000000001e-06, 'epoch': 0.64}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6998, 'grad_norm': 4.432128429412842, 'learning_rate': 8.64e-06, 'epoch': 0.68}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6558, 'grad_norm': 4.955327033996582, 'learning_rate': 8.560000000000001e-06, 'epoch': 0.72}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.659, 'grad_norm': 3.889341354370117, 'learning_rate': 8.48e-06, 'epoch': 0.76}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6482, 'grad_norm': 6.007124900817871, 'learning_rate': 8.400000000000001e-06, 'epoch': 0.8}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6161, 'grad_norm': 6.678225994110107, 'learning_rate': 8.32e-06, 'epoch': 0.84}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6259, 'grad_norm': 4.757007598876953, 'learning_rate': 8.24e-06, 'epoch': 0.88}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6267, 'grad_norm': 4.747682571411133, 'learning_rate': 8.16e-06, 'epoch': 0.92}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.6951, 'grad_norm': 6.062472343444824, 'learning_rate': 8.08e-06, 'epoch': 0.96}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.654, 'grad_norm': 4.377899646759033, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 1.4386142492294312, 'eval_bleu': 28.5249, 'eval_gen_len': 12.385, 'eval_meteor': 0.5572, 'eval_runtime': 14.901, 'eval_samples_per_second': 13.422, 'eval_steps_per_second': 1.678, 'epoch': 1.0}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.408, 'grad_norm': 5.833208084106445, 'learning_rate': 7.92e-06, 'epoch': 1.04}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4205, 'grad_norm': 5.396154403686523, 'learning_rate': 7.840000000000001e-06, 'epoch': 1.08}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4291, 'grad_norm': 7.266521453857422, 'learning_rate': 7.76e-06, 'epoch': 1.12}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4274, 'grad_norm': 5.637810230255127, 'learning_rate': 7.680000000000001e-06, 'epoch': 1.16}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4314, 'grad_norm': 4.704964637756348, 'learning_rate': 7.600000000000001e-06, 'epoch': 1.2}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4117, 'grad_norm': 5.624796390533447, 'learning_rate': 7.520000000000001e-06, 'epoch': 1.24}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4387, 'grad_norm': 4.93252420425415, 'learning_rate': 7.440000000000001e-06, 'epoch': 1.28}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.398, 'grad_norm': 8.428054809570312, 'learning_rate': 7.360000000000001e-06, 'epoch': 1.32}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4127, 'grad_norm': 7.364628314971924, 'learning_rate': 7.280000000000001e-06, 'epoch': 1.36}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4362, 'grad_norm': 6.03603458404541, 'learning_rate': 7.2000000000000005e-06, 'epoch': 1.4}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4332, 'grad_norm': 5.740499973297119, 'learning_rate': 7.1200000000000004e-06, 'epoch': 1.44}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4197, 'grad_norm': 7.087905406951904, 'learning_rate': 7.04e-06, 'epoch': 1.48}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4168, 'grad_norm': 4.217529296875, 'learning_rate': 6.96e-06, 'epoch': 1.52}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.3869, 'grad_norm': 4.917983055114746, 'learning_rate': 6.88e-06, 'epoch': 1.56}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4128, 'grad_norm': 5.33995246887207, 'learning_rate': 6.800000000000001e-06, 'epoch': 1.6}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.3988, 'grad_norm': 5.98928689956665, 'learning_rate': 6.720000000000001e-06, 'epoch': 1.64}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4201, 'grad_norm': 6.481674671173096, 'learning_rate': 6.640000000000001e-06, 'epoch': 1.68}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4321, 'grad_norm': 6.211757183074951, 'learning_rate': 6.560000000000001e-06, 'epoch': 1.72}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4053, 'grad_norm': 6.039905071258545, 'learning_rate': 6.480000000000001e-06, 'epoch': 1.76}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4021, 'grad_norm': 3.972519874572754, 'learning_rate': 6.4000000000000006e-06, 'epoch': 1.8}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4049, 'grad_norm': 4.404269695281982, 'learning_rate': 6.3200000000000005e-06, 'epoch': 1.84}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.3633, 'grad_norm': 6.1989922523498535, 'learning_rate': 6.24e-06, 'epoch': 1.88}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.3697, 'grad_norm': 6.233440399169922, 'learning_rate': 6.16e-06, 'epoch': 1.92}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.4291, 'grad_norm': 6.4355878829956055, 'learning_rate': 6.08e-06, 'epoch': 1.96}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.423, 'grad_norm': 7.105843544006348, 'learning_rate': 6e-06, 'epoch': 2.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 1.3860807418823242, 'eval_bleu': 30.7407, 'eval_gen_len': 12.545, 'eval_meteor': 0.5504, 'eval_runtime': 10.8942, 'eval_samples_per_second': 18.358, 'eval_steps_per_second': 2.295, 'epoch': 2.0}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2401, 'grad_norm': 5.1252288818359375, 'learning_rate': 5.92e-06, 'epoch': 2.04}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.211, 'grad_norm': 7.381907939910889, 'learning_rate': 5.84e-06, 'epoch': 2.08}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2469, 'grad_norm': 5.554359436035156, 'learning_rate': 5.76e-06, 'epoch': 2.12}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2419, 'grad_norm': 5.517675399780273, 'learning_rate': 5.68e-06, 'epoch': 2.16}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2667, 'grad_norm': 7.228593826293945, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.2}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2343, 'grad_norm': 6.304991722106934, 'learning_rate': 5.5200000000000005e-06, 'epoch': 2.24}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2336, 'grad_norm': 5.836729526519775, 'learning_rate': 5.4400000000000004e-06, 'epoch': 2.28}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2287, 'grad_norm': 5.930023670196533, 'learning_rate': 5.36e-06, 'epoch': 2.32}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2329, 'grad_norm': 7.051709175109863, 'learning_rate': 5.28e-06, 'epoch': 2.36}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2644, 'grad_norm': 5.195097923278809, 'learning_rate': 5.2e-06, 'epoch': 2.4}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2343, 'grad_norm': 4.812397003173828, 'learning_rate': 5.12e-06, 'epoch': 2.44}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2415, 'grad_norm': 7.294610023498535, 'learning_rate': 5.04e-06, 'epoch': 2.48}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2156, 'grad_norm': 6.766539096832275, 'learning_rate': 4.960000000000001e-06, 'epoch': 2.52}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2829, 'grad_norm': 7.21762752532959, 'learning_rate': 4.880000000000001e-06, 'epoch': 2.56}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2609, 'grad_norm': 5.24227237701416, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.6}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2523, 'grad_norm': 6.147790908813477, 'learning_rate': 4.7200000000000005e-06, 'epoch': 2.64}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2321, 'grad_norm': 5.521937847137451, 'learning_rate': 4.6400000000000005e-06, 'epoch': 2.68}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.239, 'grad_norm': 10.362906455993652, 'learning_rate': 4.56e-06, 'epoch': 2.72}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2414, 'grad_norm': 6.603399276733398, 'learning_rate': 4.48e-06, 'epoch': 2.76}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2211, 'grad_norm': 7.9613237380981445, 'learning_rate': 4.4e-06, 'epoch': 2.8}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1843, 'grad_norm': 5.6390228271484375, 'learning_rate': 4.32e-06, 'epoch': 2.84}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2351, 'grad_norm': 5.652475833892822, 'learning_rate': 4.24e-06, 'epoch': 2.88}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2465, 'grad_norm': 7.611330509185791, 'learning_rate': 4.16e-06, 'epoch': 2.92}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2453, 'grad_norm': 5.6891093254089355, 'learning_rate': 4.08e-06, 'epoch': 2.96}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.2173, 'grad_norm': 5.021235466003418, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 1.4023678302764893, 'eval_bleu': 29.1211, 'eval_gen_len': 12.52, 'eval_meteor': 0.5423, 'eval_runtime': 11.5183, 'eval_samples_per_second': 17.364, 'eval_steps_per_second': 2.17, 'epoch': 3.0}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1068, 'grad_norm': 6.582466125488281, 'learning_rate': 3.920000000000001e-06, 'epoch': 3.04}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1125, 'grad_norm': 6.2479448318481445, 'learning_rate': 3.8400000000000005e-06, 'epoch': 3.08}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1127, 'grad_norm': 5.382486343383789, 'learning_rate': 3.7600000000000004e-06, 'epoch': 3.12}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1013, 'grad_norm': 7.1078081130981445, 'learning_rate': 3.6800000000000003e-06, 'epoch': 3.16}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0759, 'grad_norm': 6.109019756317139, 'learning_rate': 3.6000000000000003e-06, 'epoch': 3.2}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.106, 'grad_norm': 4.607779502868652, 'learning_rate': 3.52e-06, 'epoch': 3.24}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0755, 'grad_norm': 10.638628959655762, 'learning_rate': 3.44e-06, 'epoch': 3.28}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1251, 'grad_norm': 5.899642467498779, 'learning_rate': 3.3600000000000004e-06, 'epoch': 3.32}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1512, 'grad_norm': 7.923582553863525, 'learning_rate': 3.2800000000000004e-06, 'epoch': 3.36}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1147, 'grad_norm': 8.2835111618042, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.4}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0982, 'grad_norm': 6.067707538604736, 'learning_rate': 3.12e-06, 'epoch': 3.44}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1525, 'grad_norm': 6.423046112060547, 'learning_rate': 3.04e-06, 'epoch': 3.48}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1358, 'grad_norm': 8.566346168518066, 'learning_rate': 2.96e-06, 'epoch': 3.52}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0942, 'grad_norm': 8.372005462646484, 'learning_rate': 2.88e-06, 'epoch': 3.56}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1107, 'grad_norm': 5.396286487579346, 'learning_rate': 2.8000000000000003e-06, 'epoch': 3.6}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1001, 'grad_norm': 5.9697957038879395, 'learning_rate': 2.7200000000000002e-06, 'epoch': 3.64}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1364, 'grad_norm': 5.1328959465026855, 'learning_rate': 2.64e-06, 'epoch': 3.68}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1071, 'grad_norm': 4.978933334350586, 'learning_rate': 2.56e-06, 'epoch': 3.72}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1224, 'grad_norm': 7.10433292388916, 'learning_rate': 2.4800000000000004e-06, 'epoch': 3.76}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1042, 'grad_norm': 6.538266181945801, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.8}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1141, 'grad_norm': 5.645888328552246, 'learning_rate': 2.3200000000000002e-06, 'epoch': 3.84}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1071, 'grad_norm': 4.613071918487549, 'learning_rate': 2.24e-06, 'epoch': 3.88}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1002, 'grad_norm': 5.135624408721924, 'learning_rate': 2.16e-06, 'epoch': 3.92}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.116, 'grad_norm': 4.900123119354248, 'learning_rate': 2.08e-06, 'epoch': 3.96}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.1122, 'grad_norm': 6.13119649887085, 'learning_rate': 2.0000000000000003e-06, 'epoch': 4.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 1.4347739219665527, 'eval_bleu': 31.0641, 'eval_gen_len': 12.315, 'eval_meteor': 0.5513, 'eval_runtime': 9.7, 'eval_samples_per_second': 20.619, 'eval_steps_per_second': 2.577, 'epoch': 4.0}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0241, 'grad_norm': 7.391202926635742, 'learning_rate': 1.9200000000000003e-06, 'epoch': 4.04}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0172, 'grad_norm': 8.899674415588379, 'learning_rate': 1.8400000000000002e-06, 'epoch': 4.08}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0125, 'grad_norm': 5.948193550109863, 'learning_rate': 1.76e-06, 'epoch': 4.12}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0156, 'grad_norm': 5.218430042266846, 'learning_rate': 1.6800000000000002e-06, 'epoch': 4.16}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0521, 'grad_norm': 6.414409637451172, 'learning_rate': 1.6000000000000001e-06, 'epoch': 4.2}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0142, 'grad_norm': 7.25091552734375, 'learning_rate': 1.52e-06, 'epoch': 4.24}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0319, 'grad_norm': 7.305124759674072, 'learning_rate': 1.44e-06, 'epoch': 4.28}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0228, 'grad_norm': 5.558824062347412, 'learning_rate': 1.3600000000000001e-06, 'epoch': 4.32}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0179, 'grad_norm': 4.7839508056640625, 'learning_rate': 1.28e-06, 'epoch': 4.36}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0109, 'grad_norm': 8.576543807983398, 'learning_rate': 1.2000000000000002e-06, 'epoch': 4.4}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0049, 'grad_norm': 6.895087718963623, 'learning_rate': 1.12e-06, 'epoch': 4.44}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0249, 'grad_norm': 7.541550636291504, 'learning_rate': 1.04e-06, 'epoch': 4.48}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0481, 'grad_norm': 6.233947277069092, 'learning_rate': 9.600000000000001e-07, 'epoch': 4.52}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0538, 'grad_norm': 6.016066551208496, 'learning_rate': 8.8e-07, 'epoch': 4.56}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.038, 'grad_norm': 7.650704383850098, 'learning_rate': 8.000000000000001e-07, 'epoch': 4.6}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0153, 'grad_norm': 7.195834636688232, 'learning_rate': 7.2e-07, 'epoch': 4.64}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0376, 'grad_norm': 9.190327644348145, 'learning_rate': 6.4e-07, 'epoch': 4.68}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0458, 'grad_norm': 7.655123233795166, 'learning_rate': 5.6e-07, 'epoch': 4.72}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0574, 'grad_norm': 6.2976508140563965, 'learning_rate': 4.800000000000001e-07, 'epoch': 4.76}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0409, 'grad_norm': 7.577490329742432, 'learning_rate': 4.0000000000000003e-07, 'epoch': 4.8}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 0.9866, 'grad_norm': 6.318379878997803, 'learning_rate': 3.2e-07, 'epoch': 4.84}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0563, 'grad_norm': 6.711854934692383, 'learning_rate': 2.4000000000000003e-07, 'epoch': 4.88}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0355, 'grad_norm': 7.092369556427002, 'learning_rate': 1.6e-07, 'epoch': 4.92}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.031, 'grad_norm': 5.19961404800415, 'learning_rate': 8e-08, 'epoch': 4.96}


Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


{'loss': 1.0378, 'grad_norm': 7.40623664855957, 'learning_rate': 0.0, 'epoch': 5.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 1.4611365795135498, 'eval_bleu': 30.8192, 'eval_gen_len': 12.38, 'eval_meteor': 0.5431, 'eval_runtime': 11.4011, 'eval_samples_per_second': 17.542, 'eval_steps_per_second': 2.193, 'epoch': 5.0}
{'train_runtime': 20884.4541, 'train_samples_per_second': 23.941, 'train_steps_per_second': 2.993, 'train_loss': 1.3045448422851562, 'epoch': 5.0}


TrainOutput(global_step=62500, training_loss=1.3045448422851562, metrics={'train_runtime': 20884.4541, 'train_samples_per_second': 23.941, 'train_steps_per_second': 2.993, 'total_flos': 2.476985304927437e+16, 'train_loss': 1.3045448422851562, 'epoch': 5.0})

In [13]:
trainer.save_model('opus-mt-en-id-finetuned-en-to-id')

Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2}


In [14]:
src_text = [
    "Kelas NLP adalah kelas yang sangat menyenangkan, saya senang belajar NLP. Pak Hendrik merupakan dosen terbaik di program studi IBDA 2020 Calvin Institut of Technology."
]
model_path = "opus-mt-en-id-finetuned-en-to-id"

tokenizer = MBart50TokenizerFast.from_pretrained(model_path, src_lang="en_XX")
model = MBartForConditionalGeneration.from_pretrained(model_path)

model_inputs = tokenizer(src_text, return_tensors="pt")
generated_tokens = model.generate(
    **model_inputs,
    forced_bos_token_id=tokenizer.lang_code_to_id["id_ID"],
    max_new_tokens=360
)
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

print(src_text)
print(translation)

['Kelas NLP adalah kelas yang sangat menyenangkan, saya senang belajar NLP. Pak Hendrik merupakan dosen terbaik di program studi IBDA 2020 Calvin Institut of Technology.']
['Kelas NLP adalah kelas yang sangat menyenangkan, saya senang belajar NLP. Pak Hendrik merupakan dosen terbaik di program studi IBDA 2020 Calvin Institute of Technology.']
