In [1]:
from datasets import load_dataset, concatenate_datasets
from transformers import AutoTokenizer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoConfig, GenerationConfig
from transformers.integrations import TensorBoardCallback
import evaluate
import numpy as np
metric = evaluate.load("bleu")
source_lang = "dyu_Latn"
target_lang = "fra_Latn"
checkpoint = "facebook/nllb-200-distilled-600M"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#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
#huggingface-cli login

In [3]:
zindi_ds = load_dataset("uvci/Koumankan_mt_dyu_fr")

In [4]:
import re
import sys
import unicodedata
from sacremoses import MosesPunctNormalizer

mpn = MosesPunctNormalizer(lang="fr")
mpn.substitutions = [
    (re.compile(r), sub) for r, sub in mpn.substitutions
]

def get_non_printing_char_replacer(replace_by: str = " "):
    non_printable_map = {
        ord(c): replace_by
        for c in (chr(i) for i in range(sys.maxunicode + 1))
        # same as \p{C} in perl
        # see https://www.unicode.org/reports/tr44/#General_Category_Values
        if unicodedata.category(c) in {"C", "Cc", "Cf", "Cs", "Co", "Cn"}
    }

    def replace_non_printing_char(line) -> str:
        return line.translate(non_printable_map)

    return replace_non_printing_char

replace_nonprint = get_non_printing_char_replacer(" ")

def preproc(text):
    clean = mpn.normalize(text)
    clean = replace_nonprint(clean)
    # replace 𝓕𝔯𝔞𝔫𝔠𝔢𝔰𝔠𝔞 by Francesca
    clean = unicodedata.normalize("NFKC", clean)
    return clean

def preprocess_function(examples):
    inputs = [preproc(example["dyu"]) for example in examples["translation"]]
    targets = [preproc(example["fr"]) for example in examples["translation"]]
    model_inputs = tokenizer(inputs, text_target=targets, max_length=50, truncation=True, padding="max_length")
    # Check for None values in input_ids and labels
    if None in model_inputs["input_ids"] or None in model_inputs["labels"]:
        print("Warning: None values found in tokenized output")
        # Remove examples with None values
        valid_indices = [i for i, (inp, lab) in enumerate(zip(model_inputs["input_ids"], model_inputs["labels"]))
                         if inp is not None and lab is not None]
        for key in model_inputs.keys():
            model_inputs[key] = [model_inputs[key][i] for i in valid_indices]
    return model_inputs

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    print(result)
    result = {"bleu": result["bleu"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [5]:
# source_lang = "dyu_Latn"
# target_lang = "fra_Latn"

tokenizer = AutoTokenizer.from_pretrained(checkpoint, src_lang=source_lang, tgt_lang=target_lang)
# Apply preprocessing to the dataset
tokenized_zds = zindi_ds.map(
    preprocess_function,
    batched=True,
    remove_columns=zindi_ds["train"].column_names  # Remove original columns
)



In [6]:
concat_ds = concatenate_datasets([tokenized_zds['train'], tokenized_zds['test']])

In [7]:
# ### Max len of 50 is enough
# def length_excluding_terminating_ones(list_of_lists):
#     lengths = []
#     for lst in list_of_lists:
#         # Reverse the list and find the first occurrence of a number not equal to 1
#         index = next((i for i, x in enumerate(reversed(lst)) if x != 1), len(lst))
#         # Calculate the length excluding the trailing 1s
#         lengths.append(len(lst) - index)
#     return lengths
# max(length_excluding_terminating_ones(tokenized_zds['validation']['input_ids']))

In [8]:
# M2M100Config {
#   "_name_or_path": "facebook/nllb-200-distilled-600M",
#   "activation_dropout": 0.0,
#   "activation_function": "relu",
#   "architectures": [
#     "M2M100ForConditionalGeneration"
#   ],
#   "attention_dropout": 0.1,
#   "bos_token_id": 0,
#   "d_model": 1024,
#   "decoder_attention_heads": 16,
#   "decoder_ffn_dim": 4096,
#   "decoder_layerdrop": 0,
#   "decoder_layers": 12,
#   "decoder_start_token_id": 2,
#   "dropout": 0.1,
#   "encoder_attention_heads": 16,
#   "encoder_ffn_dim": 4096,
#   "encoder_layerdrop": 0,
#   "encoder_layers": 12,
#   "eos_token_id": 2,
#   "init_std": 0.02,
#   "is_encoder_decoder": true,
#   "max_length": 200,
#   "max_position_embeddings": 1024,
#   "model_type": "m2m_100",
#   "num_hidden_layers": 12,
#   "pad_token_id": 1,
#   "scale_embedding": true,
#   "tokenizer_class": "NllbTokenizer",
#   "torch_dtype": "float32",
#   "transformers_version": "4.45.0.dev0",
#   "use_cache": true,
#   "vocab_size": 256206
# }



In [9]:
!rm -rf models/nllb/nllb_output/base_model

In [10]:
import gc
# GenerationConfig {
#   "bos_token_id": 0,
#   "decoder_start_token_id": 2,
#   "eos_token_id": 2,
#   "max_length": 200,
#   "pad_token_id": 1
# }
# Create a GenerationConfig object

#Load model and config
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
config = AutoConfig.from_pretrained(checkpoint)

#Save model,config and tokenizer
model.save_pretrained('models/nllb/nllb_output/base_model')
tokenizer.save_pretrained('models/nllb/nllb_output/base_model')

#Update config
config.dropout=0.5
config.max_length=50
config.save_pretrained('models/nllb/nllb_output/base_model')

#Update generation config
generation_config = GenerationConfig(
  bos_token_id= 0,
  decoder_start_token_id= 2,
  eos_token_id= 2,
  max_length= 50,
  pad_token_id= 1
)
generation_config.save_pretrained('models/nllb/nllb_output/base_model')
del model
gc.collect()

#Reload model
model = AutoModelForSeq2SeqLM.from_pretrained('models/nllb/nllb_output/base_model')
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model='models/nllb/nllb_output/base_model')

Non-default generation parameters: {'max_length': 200}
Non-default generation parameters: {'max_length': 50}


In [11]:
import gc, torch
gc.collect()
torch.cuda.empty_cache()
torch.cuda.synchronize()

In [12]:

training_args = Seq2SeqTrainingArguments(
    output_dir="models/nllb/nllb_output",
    eval_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=70,
    per_device_eval_batch_size=70,
    # weight_decay=0.01,
    num_train_epochs=200000,
    predict_with_generate=True,
    fp16=True,
    # push_to_hub=False,
    do_train=True,
    do_eval=True,
    gradient_accumulation_steps=20,
    logging_dir= "models/nllb/nllb_output/logs",
    logging_steps = 1,
    save_strategy = 'epoch',
    save_steps = 2,
    save_total_limit = 3,
    seed = 42,
    dataloader_drop_last = False,
    eval_steps = 1,
    # label_smoothing_factor: float = 0.0,
    optim = 'adafactor',
    # resume_from_checkpoint: Optional[str] = None,
    # fp16_backend: str = 'auto',
    # batch_eval_metrics: bool = False,
    # eval_on_start=True,
    # generation_max_length= 50,
    generation_num_beams=2,
    generation_config = "models/nllb/nllb_output/base_model/generation_config.json",
    run_name="Test2"
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=concat_ds,
    eval_dataset=tokenized_zds["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Bleu,Gen Len
0,10.5365,9.690979,0.0576,12.3746
1,9.2121,7.891768,0.0573,11.4276
2,7.8964,6.464151,0.0455,9.4269
3,7.0203,5.721262,0.0341,8.2135
5,5.8466,4.846464,0.0261,7.6071
6,5.3175,4.365462,0.0281,7.9205
7,4.8011,3.951409,0.0285,7.8613
8,4.3031,3.470033,0.0304,8.4997
10,3.3853,2.663025,0.0311,8.5656
11,2.8791,2.221287,0.0355,9.3141


{'bleu': 0.057645593149918885, 'precisions': [0.2729656146011899, 0.06890835898650248, 0.03468539486885481, 0.019335019877123237], 'brevity_penalty': 0.9672713696947787, 'length_ratio': 0.9677954523275105, 'translation_length': 9917, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.05733209964375646, 'precisions': [0.2946607341490545, 0.07594094959436096, 0.03923508077810749, 0.021528525296017224], 'brevity_penalty': 0.8695129736594928, 'length_ratio': 0.8773299502293355, 'translation_length': 8990, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.045504946883814665, 'precisions': [0.29267247536066276, 0.08625678119349005, 0.045536519386834985, 0.023830801310694073], 'brevity_penalty': 0.6289848865715642, 'length_ratio': 0.683224358348785, 'translation_length': 7001, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.03407819978171915, 'precisions': [0.2843818890961506, 0.08834161771351107, 0.045312942509204195, 0.022650056625141562], 'brevity_penalty': 0.4782301301181514, 'length_ratio': 0.5754855079535474, 'translation_length': 5897, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.02952940155705326, 'precisions': [0.27087164398675984, 0.08646332240988153, 0.04534005037783375, 0.024614100959532748], 'brevity_penalty': 0.4129897622064601, 'length_ratio': 0.5306919098272666, 'translation_length': 5438, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.02611751026320297, 'precisions': [0.267962308598351, 0.08860060723157605, 0.046447140381282497, 0.024129930394431554], 'brevity_penalty': 0.3636431484439342, 'length_ratio': 0.49712110861715625, 'translation_length': 5094, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.02806856549921687, 'precisions': [0.2651255192342424, 0.0794392523364486, 0.041100123609394315, 0.021541010770505385], 'brevity_penalty': 0.4271409863639252, 'length_ratio': 0.5403532741290134, 'translation_length': 5537, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.028543585801113113, 'precisions': [0.269526128488481, 0.08660806618407446, 0.044279250161603105, 0.025387263339070567], 'brevity_penalty': 0.39880876123218545, 'length_ratio': 0.5210305455255196, 'translation_length': 5339, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.030399079975663344, 'precisions': [0.26675470452294486, 0.07325049051667756, 0.03545132260703572, 0.019593613933236574], 'brevity_penalty': 0.5008331148878107, 'length_ratio': 0.5911974236361862, 'translation_length': 6058, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.03067076700675998, 'precisions': [0.2733181099595995, 0.08147797252486973, 0.041592394533571005, 0.023431294678316124], 'brevity_penalty': 0.4493605358544287, 'length_ratio': 0.5555772421196448, 'translation_length': 5693, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0310812542080477, 'precisions': [0.2652052085050272, 0.07615317667536989, 0.037267080745341616, 0.019510464703795673], 'brevity_penalty': 0.5020913853419345, 'length_ratio': 0.5920757294817995, 'translation_length': 6067, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.035520184429664904, 'precisions': [0.273720529039678, 0.07037374658158614, 0.032353611300979725, 0.01694915254237288], 'brevity_penalty': 0.623057335110036, 'length_ratio': 0.6788328291207183, 'translation_length': 6956, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.03269088242048375, 'precisions': [0.27240432746649446, 0.0770859805167302, 0.038228315317690484, 0.01951219512195122], 'brevity_penalty': 0.519644661844896, 'length_ratio': 0.6043720113203864, 'translation_length': 6193, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.03815326684524284, 'precisions': [0.28333811299110984, 0.0743230964928221, 0.0365097588978186, 0.018012422360248446], 'brevity_penalty': 0.6254307935050519, 'length_ratio': 0.680589440811945, 'translation_length': 6974, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0391950352729957, 'precisions': [0.2923412204234122, 0.08641227538865334, 0.04314720812182741, 0.023405698778833108], 'brevity_penalty': 0.5515007503644941, 'length_ratio': 0.6269151946911291, 'translation_length': 6424, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.04517277619961068, 'precisions': [0.29478491455509725, 0.0882076358848975, 0.046348314606741575, 0.02652683528685996], 'brevity_penalty': 0.6007504100140869, 'length_ratio': 0.662437786669269, 'translation_length': 6788, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.043599282752026944, 'precisions': [0.3166992824527071, 0.10319673889723235, 0.05293631100082713, 0.030592734225621414], 'brevity_penalty': 0.5111614457974067, 'length_ratio': 0.5984190494778959, 'translation_length': 6132, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.05791629123124088, 'precisions': [0.3035212059849252, 0.0798058775950391, 0.03941874060464339, 0.021710526315789475], 'brevity_penalty': 0.8583244618312809, 'length_ratio': 0.8674734068507856, 'translation_length': 8889, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.05386355828475383, 'precisions': [0.333583546014112, 0.11175337186897881, 0.056520675311964766, 0.03441363180755095], 'brevity_penalty': 0.5837061421959432, 'length_ratio': 0.6500439152922807, 'translation_length': 6661, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.06473743749267959, 'precisions': [0.32212475633528265, 0.09841175597446934, 0.05077262693156733, 0.0294757187726504], 'brevity_penalty': 0.7800352411658801, 'length_ratio': 0.8010149311993754, 'translation_length': 8208, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.06275326955307722, 'precisions': [0.3258987142482288, 0.10713705088603478, 0.05530973451327434, 0.031842105263157894], 'brevity_penalty': 0.7086469751446427, 'length_ratio': 0.7438274616961061, 'translation_length': 7622, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.06917463618610861, 'precisions': [0.33800127307447486, 0.11105889724310777, 0.059089124460149196, 0.034898976646549465], 'brevity_penalty': 0.7374777104188729, 'length_ratio': 0.7665658241436518, 'translation_length': 7855, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0618878619906551, 'precisions': [0.34681372549019607, 0.1342693296420803, 0.07197436529455263, 0.04274061990212072], 'brevity_penalty': 0.5656952604331188, 'length_ratio': 0.6370645066848833, 'translation_length': 6528, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07210660410977715, 'precisions': [0.33580338396937137, 0.11349230304859644, 0.05985915492957746, 0.03427612655800575], 'brevity_penalty': 0.7667990985784829, 'length_ratio': 0.7901824924368108, 'translation_length': 8097, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07574032008403744, 'precisions': [0.34256985852917105, 0.1112680033888732, 0.058330422633601116, 0.03268571428571428], 'brevity_penalty': 0.8203213210593847, 'length_ratio': 0.8346833219478872, 'translation_length': 8553, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07922130865959214, 'precisions': [0.34768703597944034, 0.11161449752883032, 0.05847255369928401, 0.034320323014804845], 'brevity_penalty': 0.8433131621398902, 'length_ratio': 0.8543964087049868, 'translation_length': 8755, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07151934297367314, 'precisions': [0.349742337944128, 0.1284092834152126, 0.06886477462437396, 0.040195545898967955], 'brevity_penalty': 0.6773195213956218, 'length_ratio': 0.7196252561725383, 'translation_length': 7374, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.06923049944405946, 'precisions': [0.3593567251461988, 0.13801452784503632, 0.07547169811320754, 0.04500310366232154], 'brevity_penalty': 0.607684518818583, 'length_ratio': 0.6675124426661462, 'translation_length': 6840, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07733635527855888, 'precisions': [0.34650602409638553, 0.11934397422755894, 0.0627923713566031, 0.03520634180461646], 'brevity_penalty': 0.7909042931780006, 'length_ratio': 0.8099931687323119, 'translation_length': 8300, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07593448223228598, 'precisions': [0.35019455252918286, 0.1285462413848373, 0.06968022108172128, 0.039527720739219716], 'brevity_penalty': 0.7196047488103867, 'length_ratio': 0.7524153410754367, 'translation_length': 7710, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07915738706739822, 'precisions': [0.3577674897119342, 0.13322759714512292, 0.07211444248481286, 0.04071702944942381], 'brevity_penalty': 0.7277682395441188, 'length_ratio': 0.7588562506099347, 'translation_length': 7776, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07379787026113221, 'precisions': [0.3584985238296078, 0.14179369018078697, 0.07712418300653595, 0.04408024865781294], 'brevity_penalty': 0.6436490111658252, 'length_ratio': 0.6941543866497512, 'translation_length': 7113, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.07992556913797787, 'precisions': [0.3672361276652099, 0.14226973684210525, 0.07660961695191525, 0.0425247392350896], 'brevity_penalty': 0.6997453451238372, 'length_ratio': 0.7368986044696009, 'translation_length': 7551, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08053417277764252, 'precisions': [0.3595046597727563, 0.1364350833071361, 0.07285409804301492, 0.04038364462392731], 'brevity_penalty': 0.7347806005742945, 'length_ratio': 0.7644188542988192, 'translation_length': 7833, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0787401679561052, 'precisions': [0.36072998534700945, 0.1418157720344599, 0.07714227559535924, 0.041941440253231335], 'brevity_penalty': 0.6942017369250901, 'length_ratio': 0.7326046647799356, 'translation_length': 7507, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08474885487063921, 'precisions': [0.3553815589575786, 0.12785774767146485, 0.06844106463878327, 0.036550033429908625], 'brevity_penalty': 0.8207808600216423, 'length_ratio': 0.8350736801014931, 'translation_length': 8557, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0897011414884359, 'precisions': [0.3524446393334795, 0.12508168866814795, 0.06682615629984051, 0.0359918200408998], 'brevity_penalty': 0.8839734793797507, 'length_ratio': 0.8902117692983312, 'translation_length': 9122, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08481424168572677, 'precisions': [0.36846835280523815, 0.14371636593858816, 0.07913247362250879, 0.04363357999489666], 'brevity_penalty': 0.7293706468275825, 'length_ratio': 0.7601249146091539, 'translation_length': 7789, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08325391607709678, 'precisions': [0.3688710754843019, 0.14931825739940138, 0.08230706075533661, 0.046368265880461], 'brevity_penalty': 0.6914221773397159, 'length_ratio': 0.7304576949351029, 'translation_length': 7485, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0852562385004187, 'precisions': [0.3664604279453467, 0.14426594560203596, 0.07982045277127245, 0.04517920242301868], 'brevity_penalty': 0.7255465029965317, 'length_ratio': 0.7570996389187079, 'translation_length': 7758, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08845193048675255, 'precisions': [0.36069013756120305, 0.13113831433797665, 0.07140381282495667, 0.03946774921064502], 'brevity_penalty': 0.8231906124543864, 'length_ratio': 0.8371230604079243, 'translation_length': 8578, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0906019940689669, 'precisions': [0.3625648279113626, 0.13646085840581776, 0.07556410704915165, 0.041383989145183174], 'brevity_penalty': 0.8123671196905289, 'length_ratio': 0.8279496437981848, 'translation_length': 8484, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0886323751684759, 'precisions': [0.3631526849789656, 0.14218726365148995, 0.07878112225938314, 0.04429465575349061], 'brevity_penalty': 0.7650001586175481, 'length_ratio': 0.7887186493607885, 'translation_length': 8082, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08555982674637644, 'precisions': [0.3743751644304131, 0.148915348230305, 0.0822554567502021, 0.04699946893255443], 'brevity_penalty': 0.7061449478360945, 'length_ratio': 0.7418756709280765, 'translation_length': 7602, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08500131974677327, 'precisions': [0.3648546144121365, 0.14070507842832738, 0.07695238095238095, 0.043082225504677496], 'brevity_penalty': 0.7441975174132159, 'length_ratio': 0.7719332487557334, 'translation_length': 7910, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08851838777941698, 'precisions': [0.3633205558217537, 0.13799621928166353, 0.07481784254487293, 0.040657834627683874], 'brevity_penalty': 0.7965386421719536, 'length_ratio': 0.814677466575583, 'translation_length': 8348, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08538355907456147, 'precisions': [0.3710480290595991, 0.1528010734652801, 0.0867135761589404, 0.04914933837429111], 'brevity_penalty': 0.6848318130967839, 'length_ratio': 0.7253830389382259, 'translation_length': 7433, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08337792558238961, 'precisions': [0.3695413347981324, 0.15350197900533472, 0.08666243923060664, 0.050109529025191675], 'brevity_penalty': 0.6655321316523273, 'length_ratio': 0.7106470186396018, 'translation_length': 7282, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0900590098052643, 'precisions': [0.3678885979528684, 0.1383638724570769, 0.07503556187766713, 0.04145437702640111], 'brevity_penalty': 0.8028474375585896, 'length_ratio': 0.8199473016492632, 'translation_length': 8402, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09351974132390083, 'precisions': [0.3690922730682671, 0.14922629079209437, 0.08625235404896422, 0.0495847581827064], 'brevity_penalty': 0.7548808937317376, 'length_ratio': 0.7805211281350639, 'translation_length': 7998, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.088991135075202, 'precisions': [0.36735725113693785, 0.14678044996121023, 0.0820483533219113, 0.04603643525356967], 'brevity_penalty': 0.7449286012258527, 'length_ratio': 0.7725187859861423, 'translation_length': 7916, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08986002156069303, 'precisions': [0.3678625568468924, 0.14724592707525214, 0.08319088319088319, 0.04698972099853157], 'brevity_penalty': 0.7449286012258527, 'length_ratio': 0.7725187859861423, 'translation_length': 7916, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08719019625734414, 'precisions': [0.37685211070729663, 0.1617103642442372, 0.09384482011270048, 0.05696559503666103], 'brevity_penalty': 0.6489850870092793, 'length_ratio': 0.698155557724212, 'translation_length': 7154, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.08934683632685171, 'precisions': [0.38012489818083084, 0.1594571670907549, 0.09291107382550336, 0.05409115870400879], 'brevity_penalty': 0.6762980698098279, 'length_ratio': 0.7188445398653265, 'translation_length': 7366, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09749658788248017, 'precisions': [0.36780432309442546, 0.13949993168465638, 0.07809619574325456, 0.043760831889081454], 'brevity_penalty': 0.8472524798793997, 'length_ratio': 0.8578120425490388, 'translation_length': 8790, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09878372541727716, 'precisions': [0.3766992606725495, 0.1477946493130875, 0.08459483526268922, 0.04911955514365153], 'brevity_penalty': 0.8009814573825962, 'length_ratio': 0.8183858690348395, 'translation_length': 8386, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09249487680869735, 'precisions': [0.37433085259172216, 0.15530058177117, 0.09015421115065243, 0.05395775006362942], 'brevity_penalty': 0.7132643852397524, 'length_ratio': 0.7474382746169611, 'translation_length': 7659, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0937139531201314, 'precisions': [0.37520391517128876, 0.1512773160972607, 0.08656942602765676, 0.04924895345973898], 'brevity_penalty': 0.7513695292892014, 'length_ratio': 0.7776910315214209, 'translation_length': 7969, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.0938102869849332, 'precisions': [0.37017931712110047, 0.14645480437715486, 0.08368507983116168, 0.048013245033112585], 'brevity_penalty': 0.7721812426464494, 'length_ratio': 0.7945740216648776, 'translation_length': 8142, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09512478442317684, 'precisions': [0.3789984268484531, 0.16111742731849926, 0.09476209516193522, 0.055873180873180876], 'brevity_penalty': 0.7093967449430397, 'length_ratio': 0.744412998926515, 'translation_length': 7628, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09207326870052732, 'precisions': [0.3786488740617181, 0.16739472304735278, 0.10047434238896076, 0.061620709060213844], 'brevity_penalty': 0.6541743132819424, 'length_ratio': 0.7020591392602713, 'translation_length': 7194, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09810756541983205, 'precisions': [0.3743836291913215, 0.1519349495557898, 0.08886430678466077, 0.052519011406844104], 'brevity_penalty': 0.768595592511042, 'length_ratio': 0.791646335512833, 'translation_length': 8112, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10127049208668491, 'precisions': [0.3830232844647037, 0.15753836894372555, 0.09210770659238626, 0.05406060606060606], 'brevity_penalty': 0.7691938802718356, 'length_ratio': 0.7921342832048405, 'translation_length': 8117, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09960928282325213, 'precisions': [0.38368731934146033, 0.15911193339500462, 0.0928639391056137, 0.05490683229813664], 'brevity_penalty': 0.7499138779969634, 'length_ratio': 0.7765199570606031, 'translation_length': 7957, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10041981195518289, 'precisions': [0.38318227593152066, 0.16020392399196662, 0.09465881011214598, 0.05580246913580247], 'brevity_penalty': 0.7483351571448997, 'length_ratio': 0.7752512930613839, 'translation_length': 7944, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10379781936386946, 'precisions': [0.3802495800335973, 0.15445140609063093, 0.09053497942386832, 0.05468025949953661], 'brevity_penalty': 0.7948978710492104, 'length_ratio': 0.8133112130379623, 'translation_length': 8334, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10559036754559294, 'precisions': [0.38341601700921335, 0.15625446747676913, 0.09151589671526436, 0.05259562841530055], 'brevity_penalty': 0.8102836597921959, 'length_ratio': 0.8261930321069582, 'translation_length': 8466, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10674833659587767, 'precisions': [0.3770830938972532, 0.1511756569847856, 0.08914073071718538, 0.05201305767138194], 'brevity_penalty': 0.8372096798100793, 'length_ratio': 0.8491265736313067, 'translation_length': 8701, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10185313554261631, 'precisions': [0.3837541982833686, 0.16184531059683313, 0.09432902863559797, 0.05511046370478272], 'brevity_penalty': 0.7598296409366053, 'length_ratio': 0.7845222992095248, 'translation_length': 8039, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10300958844469871, 'precisions': [0.3820031874463651, 0.15883936583906672, 0.09354897996691784, 0.05527757922325471], 'brevity_penalty': 0.7739703991496236, 'length_ratio': 0.7960378647408998, 'translation_length': 8157, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1088217663670128, 'precisions': [0.3803394464061966, 0.15229885057471265, 0.08973281801377919, 0.052665941240478784], 'brevity_penalty': 0.8460158173509815, 'length_ratio': 0.8567385576266224, 'translation_length': 8779, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10580479980107553, 'precisions': [0.37759044608359676, 0.15572842998585573, 0.0904071773636991, 0.052410437859354266], 'brevity_penalty': 0.8189416697920899, 'length_ratio': 0.8335122474870694, 'translation_length': 8541, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10532082692667238, 'precisions': [0.3880911116613023, 0.164976540033298, 0.09761549925484352, 0.0576271186440678], 'brevity_penalty': 0.7645200281830439, 'length_ratio': 0.7883282912071826, 'translation_length': 8078, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10813134402334984, 'precisions': [0.38494778537990637, 0.1606413994169096, 0.09644396551724138, 0.05751695113397241], 'brevity_penalty': 0.7945460007957612, 'length_ratio': 0.8130184444227578, 'translation_length': 8331, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10411110938001203, 'precisions': [0.38406600172392563, 0.1613533834586466, 0.09544868251335913, 0.056599286563614744], 'brevity_penalty': 0.7696723148067361, 'length_ratio': 0.7925246413584464, 'translation_length': 8121, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1060908240075824, 'precisions': [0.3937219730941704, 0.1733501736659299, 0.10420724581223217, 0.062261753494282084], 'brevity_penalty': 0.7313403289418007, 'length_ratio': 0.7616863472235776, 'translation_length': 7805, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10798139801552215, 'precisions': [0.38215242269296323, 0.16387364674477237, 0.09866039102099927, 0.0592196934509986], 'brevity_penalty': 0.780746894027445, 'length_ratio': 0.8016004684297843, 'translation_length': 8214, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.09878104174255575, 'precisions': [0.3916317991631799, 0.18248815581680997, 0.11053428509625784, 0.06708004509582864], 'brevity_penalty': 0.6510627619371754, 'length_ratio': 0.6997169903386357, 'translation_length': 7170, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10918224900186738, 'precisions': [0.3863527724665392, 0.16267942583732056, 0.09691786923213967, 0.057274826789838335], 'brevity_penalty': 0.7988789160232311, 'length_ratio': 0.8166292573436128, 'translation_length': 8368, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11122463736191436, 'precisions': [0.38298115362649915, 0.15595826468973092, 0.09238304340483026, 0.05483658697082693], 'brevity_penalty': 0.8433131621398902, 'length_ratio': 0.8543964087049868, 'translation_length': 8755, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11080784448753046, 'precisions': [0.3862928348909657, 0.16392727272727273, 0.09859913793103449, 0.0600516068496364], 'brevity_penalty': 0.7963043763734565, 'length_ratio': 0.8144822874987802, 'translation_length': 8346, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11187900095100771, 'precisions': [0.3814827706230421, 0.158505875769446, 0.09524626737600823, 0.05790991790548036], 'brevity_penalty': 0.8278816898471276, 'length_ratio': 0.8411242314823851, 'translation_length': 8619, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11461372398146435, 'precisions': [0.3851445895522388, 0.16312456016889515, 0.09851877368239752, 0.06078083407275954], 'brevity_penalty': 0.8229613167945045, 'length_ratio': 0.8369278813311213, 'translation_length': 8576, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.10686017844628595, 'precisions': [0.3935841278021129, 0.17421713559052615, 0.106158357771261, 0.06446700507614213], 'brevity_penalty': 0.726040525099394, 'length_ratio': 0.7574899970723139, 'translation_length': 7762, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11253886470151253, 'precisions': [0.38890863846427304, 0.16504018369690013, 0.09871093060215433, 0.05964670796054141], 'brevity_penalty': 0.8071519053698734, 'length_ratio': 0.8235581145701181, 'translation_length': 8439, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11658970029283854, 'precisions': [0.38559273964832674, 0.16053921568627452, 0.09688755020080321, 0.05900216919739696], 'brevity_penalty': 0.8500582786846294, 'length_ratio': 0.8602517810090758, 'translation_length': 8815, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1088820078414461, 'precisions': [0.3877424935129124, 0.16898218061008757, 0.10174203113417346, 0.06113641812514985], 'brevity_penalty': 0.7663196204247827, 'length_ratio': 0.7897921342832048, 'translation_length': 8093, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.112616843373262, 'precisions': [0.390022949631598, 0.16818448883666276, 0.10184182015167931, 0.062309674396814244], 'brevity_penalty': 0.7884314068249358, 'length_ratio': 0.8079437884258808, 'translation_length': 8279, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11409525576895228, 'precisions': [0.3869458416189028, 0.1637468211359141, 0.09918501820704005, 0.05968140004487323], 'brevity_penalty': 0.8198616097168412, 'length_ratio': 0.8342929637942813, 'translation_length': 8549, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1142047064948429, 'precisions': [0.3835569315445476, 0.15986206896551725, 0.0960986319878399, 0.0581319399085565], 'brevity_penalty': 0.8394738659076754, 'length_ratio': 0.8510783643993364, 'translation_length': 8721, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11586849503687806, 'precisions': [0.38183243727598565, 0.15515622904653345, 0.09430867639822864, 0.05825242718446602], 'brevity_penalty': 0.8626575714752966, 'length_ratio': 0.8712793988484434, 'translation_length': 8928, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11504070191665977, 'precisions': [0.3841196777905639, 0.16082559911345062, 0.09753610875106203, 0.059521194816604436], 'brevity_penalty': 0.8359625517568677, 'length_ratio': 0.8480530887088904, 'translation_length': 8690, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11165284109344412, 'precisions': [0.39077000250815147, 0.17468860525911117, 0.10721688336159789, 0.06640720019459985], 'brevity_penalty': 0.7519755889171007, 'length_ratio': 0.7781789792134283, 'translation_length': 7974, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1136886861240189, 'precisions': [0.3939957821610222, 0.17435508345978756, 0.10779559865721745, 0.0667472793228537], 'brevity_penalty': 0.7624775333034997, 'length_ratio': 0.7866692690543574, 'translation_length': 8061, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11237970431532147, 'precisions': [0.3920516884066805, 0.1691919191919192, 0.10286862780924538, 0.06333017077798861], 'brevity_penalty': 0.779441898364968, 'length_ratio': 0.8005269835073681, 'translation_length': 8203, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11520512188025579, 'precisions': [0.3887459054749649, 0.1646177758937403, 0.09939183318853172, 0.06133032694475761], 'brevity_penalty': 0.819746654943882, 'length_ratio': 0.8341953742558798, 'translation_length': 8548, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11717123803457384, 'precisions': [0.38503774879890185, 0.16311373951313438, 0.09915966386554621, 0.06025917926565875], 'brevity_penalty': 0.8418466551900603, 'length_ratio': 0.8531277447057676, 'translation_length': 8742, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11615682693747874, 'precisions': [0.39287840895557935, 0.17008374241986718, 0.10363153232949512, 0.06345933562428407], 'brevity_penalty': 0.8022646164782152, 'length_ratio': 0.8194593539572558, 'translation_length': 8397, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.117612679756933, 'precisions': [0.39072771306654364, 0.16555555555555557, 0.10040983606557377, 0.06094858156028369], 'brevity_penalty': 0.8338053674608543, 'length_ratio': 0.8461988874792622, 'translation_length': 8671, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11402205661969782, 'precisions': [0.39061371841155235, 0.16888433981576254, 0.10347309699478136, 0.06290926099158092], 'brevity_penalty': 0.7920801764132083, 'length_ratio': 0.8109690641163267, 'translation_length': 8310, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11724345873815703, 'precisions': [0.38787528868360277, 0.16511336764501322, 0.0997778916794806, 0.06154527341155634], 'brevity_penalty': 0.832554702115602, 'length_ratio': 0.8451254025568459, 'translation_length': 8660, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11720472900885213, 'precisions': [0.3902553291168892, 0.16626574218197254, 0.10190972222222222, 0.06355266112732989], 'brevity_penalty': 0.8185965144875945, 'length_ratio': 0.833219478871865, 'translation_length': 8538, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11454852730678747, 'precisions': [0.40525084481414086, 0.18656596496866462, 0.11792828685258964, 0.07281299109481404], 'brevity_penalty': 0.7176186348626853, 'length_ratio': 0.750853908461013, 'translation_length': 7694, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11894424447368968, 'precisions': [0.3820348708894284, 0.15900408378342773, 0.09595878280470134, 0.05793503000206911], 'brevity_penalty': 0.8774232118306676, 'length_ratio': 0.8843563969942422, 'translation_length': 9062, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11517490471776418, 'precisions': [0.3848761103319308, 0.16654904728299225, 0.10063760124073755, 0.06013707716117621], 'brevity_penalty': 0.8206659914403701, 'length_ratio': 0.8349760905630916, 'translation_length': 8556, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11590113475901385, 'precisions': [0.38767348859091977, 0.16711705305077515, 0.10210471386328057, 0.061996418979409135], 'brevity_penalty': 0.8144470807449296, 'length_ratio': 0.8297062554894116, 'translation_length': 8502, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1159087151289269, 'precisions': [0.39316239316239315, 0.17159157401989467, 0.10572766570605187, 0.06440281030444965], 'brevity_penalty': 0.7917275253444581, 'length_ratio': 0.8106762955011223, 'translation_length': 8307, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11282466487602177, 'precisions': [0.39038817005545284, 0.1736905478627333, 0.10639867232159321, 0.06423982869379015], 'brevity_penalty': 0.768954597780012, 'length_ratio': 0.7919391041280375, 'translation_length': 8115, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


KeyboardInterrupt: 

In [None]:
# trainer.save_model("/root/zindi/models/nllb/nllb_output/checkpoint-300")

Non-default generation parameters: {'max_length': 100}
