In [1]:
from datasets import load_dataset, concatenate_datasets
from transformers import AutoTokenizer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoConfig, GenerationConfig
from transformers.integrations import TensorBoardCallback
import evaluate
import numpy as np
metric = evaluate.load("bleu")
source_lang = "dyu_Latn"
target_lang = "fra_Latn"
checkpoint = "/root/zindi/models/nllb/nllb_output/checkpoint-108"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#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
#huggingface-cli login

In [3]:
zindi_ds = load_dataset("uvci/Koumankan_mt_dyu_fr")

In [4]:
import re
import sys
import unicodedata
from sacremoses import MosesPunctNormalizer

mpn = MosesPunctNormalizer(lang="fr")
mpn.substitutions = [
    (re.compile(r), sub) for r, sub in mpn.substitutions
]

def get_non_printing_char_replacer(replace_by: str = " "):
    non_printable_map = {
        ord(c): replace_by
        for c in (chr(i) for i in range(sys.maxunicode + 1))
        # same as \p{C} in perl
        # see https://www.unicode.org/reports/tr44/#General_Category_Values
        if unicodedata.category(c) in {"C", "Cc", "Cf", "Cs", "Co", "Cn"}
    }

    def replace_non_printing_char(line) -> str:
        return line.translate(non_printable_map)

    return replace_non_printing_char

replace_nonprint = get_non_printing_char_replacer(" ")

def preproc(text):
    clean = mpn.normalize(text)
    clean = replace_nonprint(clean)
    # replace 𝓕𝔯𝔞𝔫𝔠𝔢𝔰𝔠𝔞 by Francesca
    clean = unicodedata.normalize("NFKC", clean)
    return clean

def preprocess_function(examples):
    inputs = [preproc(example["dyu"]) for example in examples["translation"]]
    targets = [preproc(example["fr"]) for example in examples["translation"]]
    model_inputs = tokenizer(inputs, text_target=targets, max_length=50, truncation=True, padding="max_length")
    # Check for None values in input_ids and labels
    if None in model_inputs["input_ids"] or None in model_inputs["labels"]:
        print("Warning: None values found in tokenized output")
        # Remove examples with None values
        valid_indices = [i for i, (inp, lab) in enumerate(zip(model_inputs["input_ids"], model_inputs["labels"]))
                         if inp is not None and lab is not None]
        for key in model_inputs.keys():
            model_inputs[key] = [model_inputs[key][i] for i in valid_indices]
    return model_inputs

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    print(result)
    result = {"bleu": result["bleu"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [5]:
# source_lang = "dyu_Latn"
# target_lang = "fra_Latn"

tokenizer = AutoTokenizer.from_pretrained(checkpoint, src_lang=source_lang, tgt_lang=target_lang)
# Apply preprocessing to the dataset
tokenized_zds = zindi_ds.map(
    preprocess_function,
    batched=True,
    remove_columns=zindi_ds["train"].column_names  # Remove original columns
)

Map: 100%|██████████| 8065/8065 [00:00<00:00, 8337.47 examples/s]
Map: 100%|██████████| 1471/1471 [00:00<00:00, 6094.82 examples/s]
Map: 100%|██████████| 1393/1393 [00:00<00:00, 8935.73 examples/s]


In [6]:
concat_ds = concatenate_datasets([tokenized_zds['train'], tokenized_zds['test']])

In [7]:
# ### Max len of 50 is enough
# def length_excluding_terminating_ones(list_of_lists):
#     lengths = []
#     for lst in list_of_lists:
#         # Reverse the list and find the first occurrence of a number not equal to 1
#         index = next((i for i, x in enumerate(reversed(lst)) if x != 1), len(lst))
#         # Calculate the length excluding the trailing 1s
#         lengths.append(len(lst) - index)
#     return lengths
# max(length_excluding_terminating_ones(tokenized_zds['validation']['input_ids']))

In [8]:
# M2M100Config {
#   "_name_or_path": "facebook/nllb-200-distilled-600M",
#   "activation_dropout": 0.0,
#   "activation_function": "relu",
#   "architectures": [
#     "M2M100ForConditionalGeneration"
#   ],
#   "attention_dropout": 0.1,
#   "bos_token_id": 0,
#   "d_model": 1024,
#   "decoder_attention_heads": 16,
#   "decoder_ffn_dim": 4096,
#   "decoder_layerdrop": 0,
#   "decoder_layers": 12,
#   "decoder_start_token_id": 2,
#   "dropout": 0.1,
#   "encoder_attention_heads": 16,
#   "encoder_ffn_dim": 4096,
#   "encoder_layerdrop": 0,
#   "encoder_layers": 12,
#   "eos_token_id": 2,
#   "init_std": 0.02,
#   "is_encoder_decoder": true,
#   "max_length": 200,
#   "max_position_embeddings": 1024,
#   "model_type": "m2m_100",
#   "num_hidden_layers": 12,
#   "pad_token_id": 1,
#   "scale_embedding": true,
#   "tokenizer_class": "NllbTokenizer",
#   "torch_dtype": "float32",
#   "transformers_version": "4.45.0.dev0",
#   "use_cache": true,
#   "vocab_size": 256206
# }



In [9]:
!rm -rf models/nllb/nllb_output/base_model

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [10]:
import gc
# GenerationConfig {
#   "bos_token_id": 0,
#   "decoder_start_token_id": 2,
#   "eos_token_id": 2,
#   "max_length": 200,
#   "pad_token_id": 1
# }
# Create a GenerationConfig object

#Load model and config
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
config = AutoConfig.from_pretrained(checkpoint)

#Save model,config and tokenizer
model.save_pretrained('models/nllb/nllb_output/base_model')
tokenizer.save_pretrained('models/nllb/nllb_output/base_model')

#Update config
config.dropout=0.5
config.max_length=50
config.save_pretrained('models/nllb/nllb_output/base_model')

#Update generation config
generation_config = GenerationConfig(
  bos_token_id= 0,
  decoder_start_token_id= 2,
  eos_token_id= 2,
  max_length= 50,
  pad_token_id= 1
)
generation_config.save_pretrained('models/nllb/nllb_output/base_model')
del model
gc.collect()

#Reload model
model = AutoModelForSeq2SeqLM.from_pretrained('models/nllb/nllb_output/base_model')
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model='models/nllb/nllb_output/base_model')

Non-default generation parameters: {'max_length': 50}
Non-default generation parameters: {'max_length': 50}


In [11]:
import gc, torch
gc.collect()
torch.cuda.empty_cache()
torch.cuda.synchronize()

In [12]:

training_args = Seq2SeqTrainingArguments(
    output_dir="models/nllb/nllb_output",
    eval_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=70,
    per_device_eval_batch_size=70,
    # weight_decay=0.01,
    num_train_epochs=200000,
    predict_with_generate=True,
    fp16=True,
    # push_to_hub=False,
    do_train=True,
    do_eval=True,
    gradient_accumulation_steps=20,
    logging_dir= "models/nllb/nllb_output/logs",
    logging_steps = 1,
    save_strategy = 'epoch',
    save_steps = 2,
    save_total_limit = 3,
    seed = 42,
    dataloader_drop_last = False,
    eval_steps = 1,
    # label_smoothing_factor: float = 0.0,
    optim = 'adafactor',
    # resume_from_checkpoint: Optional[str] = None,
    # fp16_backend: str = 'auto',
    # batch_eval_metrics: bool = False,
    # eval_on_start=True,
    # generation_max_length= 50,
    generation_num_beams=2,
    generation_config = "models/nllb/nllb_output/base_model/generation_config.json",
    run_name="Test3"
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=concat_ds,
    eval_dataset=tokenized_zds["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Bleu,Gen Len
0,0.2497,0.694405,0.1171,11.4052
1,0.2541,0.699035,0.1171,11.567
2,0.259,0.702424,0.1184,11.7661
3,0.2509,0.708485,0.1187,11.6757
5,0.2455,0.712764,0.1165,11.4752
6,0.2345,0.714981,0.1149,11.2339
7,0.2364,0.717659,0.1202,11.4201
8,0.2399,0.715488,0.1179,11.5105
10,0.2334,0.724775,0.1202,11.6451
11,0.2277,0.722733,0.1211,11.5697


{'bleu': 0.11711129409509477, 'precisions': [0.3869575361642557, 0.16603295310519645, 0.10192074753417546, 0.06276619592019726], 'brevity_penalty': 0.8225025962954745, 'length_ratio': 0.8365375231775154, 'translation_length': 8572, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11712021186375826, 'precisions': [0.3851266185401627, 0.16400220507166482, 0.09959555106167846, 0.0600346770697876], 'brevity_penalty': 0.8401522869864303, 'length_ratio': 0.8516639016297453, 'translation_length': 8727, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11835269389043589, 'precisions': [0.380515117581187, 0.16128167314653438, 0.09763265306122448, 0.05906908787309539], 'brevity_penalty': 0.8628793478853494, 'length_ratio': 0.8714745779252464, 'translation_length': 8930, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11867357851812017, 'precisions': [0.38450259418001353, 0.16375929682217716, 0.0984447385837194, 0.0596633283613893], 'brevity_penalty': 0.8557614759620363, 'length_ratio': 0.8652288474675515, 'translation_length': 8866, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11621035359545959, 'precisions': [0.38767695521002554, 0.16510423954106618, 0.1006872852233677, 0.06027580071174377], 'brevity_penalty': 0.8277674883789404, 'length_ratio': 0.8410266419439836, 'translation_length': 8618, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11645026253686239, 'precisions': [0.3902439024390244, 0.16562802935881457, 0.09884117246080436, 0.05887560867640549], 'brevity_penalty': 0.8361893987250133, 'length_ratio': 0.8482482677856934, 'translation_length': 8692, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11493819487601392, 'precisions': [0.390465924062762, 0.170689153823786, 0.1038037682189833, 0.06262857142857142], 'brevity_penalty': 0.7966557588134452, 'length_ratio': 0.8147750561139846, 'translation_length': 8349, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12020631807814525, 'precisions': [0.39134448932487015, 0.1690297470113984, 0.1031596925704526, 0.06350962602345651], 'brevity_penalty': 0.8331233472552837, 'length_ratio': 0.8456133502488533, 'translation_length': 8665, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11789233488700968, 'precisions': [0.3861488822309288, 0.16636603302344943, 0.10147382686769439, 0.06107367963486199], 'brevity_penalty': 0.8346005697879634, 'length_ratio': 0.8468820142480726, 'translation_length': 8678, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11501374945521445, 'precisions': [0.38954954954954957, 0.17172454041435659, 0.10453648915187377, 0.06300860265054639], 'brevity_penalty': 0.7938419675390699, 'length_ratio': 0.812432907192349, 'translation_length': 8325, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12022866612755323, 'precisions': [0.3853801830301661, 0.16571815718157182, 0.1004950495049505, 0.06118143459915612], 'brevity_penalty': 0.854086936438421, 'length_ratio': 0.8637650043915293, 'translation_length': 8851, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1211460982631057, 'precisions': [0.393781207444394, 0.16796076828769924, 0.10140797854508883, 0.06160208968219417], 'brevity_penalty': 0.849721934476216, 'length_ratio': 0.8599590123938714, 'translation_length': 8812, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12123538115178274, 'precisions': [0.38595909342178, 0.1622656456297861, 0.0980297157622739, 0.05987525987525988], 'brevity_penalty': 0.8755604354002521, 'length_ratio': 0.882697374841417, 'translation_length': 9045, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12241053048196006, 'precisions': [0.38355565274436915, 0.16130293159609121, 0.0984884645982498, 0.059640522875817], 'brevity_penalty': 0.8865830328247704, 'length_ratio': 0.8925539182199668, 'translation_length': 9146, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1213092096479343, 'precisions': [0.3833023306707517, 0.16066770996348462, 0.09626687847498014, 0.05932547744819179], 'brevity_penalty': 0.885822534984257, 'length_ratio': 0.8918707914511564, 'translation_length': 9139, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12222960957600523, 'precisions': [0.38570315109675984, 0.16511318242343542, 0.10025957170668397, 0.061436280614362807], 'brevity_penalty': 0.8685203539035803, 'length_ratio': 0.876451644383722, 'translation_length': 8981, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11941900184692854, 'precisions': [0.3889516493447808, 0.16501829020457934, 0.09893934371892608, 0.06015358361774744], 'brevity_penalty': 0.8541986468031265, 'length_ratio': 0.8638625939299307, 'translation_length': 8852, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12156522468201407, 'precisions': [0.38995108937305467, 0.16438538205980066, 0.09960873818063254, 0.059654300168634065], 'brevity_penalty': 0.8701742454622539, 'length_ratio': 0.8779154874597443, 'translation_length': 8996, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11639391073228435, 'precisions': [0.3961008083689967, 0.17086875090044662, 0.10368663594470046, 0.06258628624022089], 'brevity_penalty': 0.8040122679239887, 'length_ratio': 0.820923197033278, 'translation_length': 8412, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12201944281736665, 'precisions': [0.38830022075055187, 0.1628673079457109, 0.09864384888601872, 0.06001250260470931], 'brevity_penalty': 0.8772042192542276, 'length_ratio': 0.8841612179174393, 'translation_length': 9060, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12043263418014323, 'precisions': [0.38902828467153283, 0.16609565574893792, 0.10230136065849152, 0.06248643957474506], 'brevity_penalty': 0.8447778637312635, 'length_ratio': 0.8556650727042061, 'translation_length': 8768, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12118030038930838, 'precisions': [0.38717523493642897, 0.1622656456297861, 0.09853012437409142, 0.059276206322795344], 'brevity_penalty': 0.8755604354002521, 'length_ratio': 0.882697374841417, 'translation_length': 9045, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11538910383259839, 'precisions': [0.38766626360338574, 0.1707604059420503, 0.10640216411181244, 0.06548590072244233], 'brevity_penalty': 0.7873701333426283, 'length_ratio': 0.8070654825802674, 'translation_length': 8270, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11725258860790261, 'precisions': [0.3900709219858156, 0.169409071397911, 0.1045326774420239, 0.06369426751592357], 'brevity_penalty': 0.8095883952995467, 'length_ratio': 0.8256074948765493, 'translation_length': 8460, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12078141082364427, 'precisions': [0.38402320651567556, 0.16243993593166045, 0.09972253957891301, 0.06067604450976275], 'brevity_penalty': 0.8665325493361479, 'length_ratio': 0.8746950326924954, 'translation_length': 8963, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12250318614132011, 'precisions': [0.38386847622200154, 0.1620126448893572, 0.09998389953308646, 0.061076604554865424], 'brevity_penalty': 0.8775326923656148, 'length_ratio': 0.8844539865326437, 'translation_length': 9063, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11959262671909926, 'precisions': [0.38904299583911234, 0.16724690154574573, 0.10368978476255551, 0.06338339222614842], 'brevity_penalty': 0.8316443119831952, 'length_ratio': 0.844344686249634, 'translation_length': 8652, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12101359314026437, 'precisions': [0.3897968713518562, 0.16983791402396053, 0.10690846286701208, 0.0664288898796255], 'brevity_penalty': 0.821814192550625, 'length_ratio': 0.8359519859471065, 'translation_length': 8566, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11767908908243897, 'precisions': [0.38617374384800274, 0.1635012386457473, 0.10016891891891892, 0.060533216783216784], 'brevity_penalty': 0.8412821331023466, 'length_ratio': 0.8526397970137601, 'translation_length': 8737, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11669048672074253, 'precisions': [0.39006329869819656, 0.16966096783541001, 0.10554371002132196, 0.06498277841561424], 'brevity_penalty': 0.7994633073142109, 'length_ratio': 0.8171172050356201, 'translation_length': 8373, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11801541012187038, 'precisions': [0.39019725044829645, 0.17188859878154916, 0.10716833095577746, 0.06637372802960222], 'brevity_penalty': 0.7985281512486925, 'length_ratio': 0.8163364887284084, 'translation_length': 8365, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11674636881142783, 'precisions': [0.3928092626447288, 0.1744876744876745, 0.10926669098869025, 0.06712361143937604], 'brevity_penalty': 0.7796792680772735, 'length_ratio': 0.800722162584171, 'translation_length': 8205, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11896928208872795, 'precisions': [0.3949219215639528, 0.17259323503902863, 0.10768956924172303, 0.06618838231890765], 'brevity_penalty': 0.8013315397936626, 'length_ratio': 0.8186786376500439, 'translation_length': 8389, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12051660848215867, 'precisions': [0.3891032917139614, 0.16500885679247854, 0.1015389762462362, 0.062134661182074044], 'brevity_penalty': 0.8494976517418725, 'length_ratio': 0.8597638333170684, 'translation_length': 8810, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11763188418095266, 'precisions': [0.38675917215428035, 0.16721171619508032, 0.10464913808114226, 0.06422636424882101], 'brevity_penalty': 0.8146779716379029, 'length_ratio': 0.8299014345662145, 'translation_length': 8504, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12036879311190843, 'precisions': [0.38754090960388216, 0.16400541271989175, 0.10071345611415297, 0.061307609860664525], 'brevity_penalty': 0.8552035617801065, 'length_ratio': 0.864740899775544, 'translation_length': 8861, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12164443514940208, 'precisions': [0.3843834141087776, 0.1577937036089071, 0.09520836584985172, 0.057388522295540895], 'brevity_penalty': 0.9015786741779402, 'length_ratio': 0.906118864057773, 'translation_length': 9285, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11874344443408902, 'precisions': [0.3859187178019462, 0.16492290748898678, 0.10105935765932403, 0.061771058315334776], 'brevity_penalty': 0.8410562494288131, 'length_ratio': 0.8524446179369571, 'translation_length': 8735, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11714117882053116, 'precisions': [0.38564809314482595, 0.16757846242441693, 0.10527246992215145, 0.0660398991057097], 'brevity_penalty': 0.8045942772800627, 'length_ratio': 0.8214111447252854, 'translation_length': 8417, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11970865615070135, 'precisions': [0.3912129002103295, 0.16918301114717088, 0.10512642881884308, 0.06499327655759748], 'brevity_penalty': 0.8208957178307029, 'length_ratio': 0.8351712696398946, 'translation_length': 8558, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12388995206553237, 'precisions': [0.38931633326005716, 0.1625803068047725, 0.10017716218392655, 0.06157378417866834], 'brevity_penalty': 0.881357899998883, 'length_ratio': 0.8878696203766956, 'translation_length': 9098, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12142253792191339, 'precisions': [0.39231885676007594, 0.16283729628640128, 0.09990141307919816, 0.06059312993385961], 'brevity_penalty': 0.8658691873060747, 'length_ratio': 0.8741094954620865, 'translation_length': 8957, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1184261157948861, 'precisions': [0.38590680676107814, 0.1633493479752917, 0.10063844086021505, 0.061268672872916215], 'brevity_penalty': 0.8434258955743296, 'length_ratio': 0.8544939982433883, 'translation_length': 8756, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11902594035191454, 'precisions': [0.38768582465176166, 0.167420814479638, 0.10539088230195874, 0.0651589789520824], 'brevity_penalty': 0.819171719435908, 'length_ratio': 0.8337074265638723, 'translation_length': 8543, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11738219050167567, 'precisions': [0.38794705062703205, 0.16482285394202492, 0.10226882090065315, 0.06204136090727151], 'brevity_penalty': 0.8270820538641993, 'length_ratio': 0.8404411047135747, 'translation_length': 8612, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12116579624870609, 'precisions': [0.38948092127878997, 0.1657938257993385, 0.10413839891451832, 0.06433135051773518], 'brevity_penalty': 0.8401522869864303, 'length_ratio': 0.8516639016297453, 'translation_length': 8727, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12132341669576559, 'precisions': [0.39127366811713515, 0.17007963594994313, 0.1086501569584932, 0.06806400721208024], 'brevity_penalty': 0.8145625315875773, 'length_ratio': 0.829803845027813, 'translation_length': 8503, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1179356297419643, 'precisions': [0.3899595527004521, 0.16914203316510454, 0.1065036328194223, 0.0661308840413318], 'brevity_penalty': 0.8033134995818489, 'length_ratio': 0.8203376598028691, 'translation_length': 8406, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12202873582787761, 'precisions': [0.38030006523157206, 0.15827617445321598, 0.09783809373520594, 0.05941794664510913], 'brevity_penalty': 0.8922164384307543, 'length_ratio': 0.8976285742168439, 'translation_length': 9198, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12165697802009884, 'precisions': [0.38768489852081184, 0.16537931034482758, 0.10452312383533796, 0.06581834137779728], 'brevity_penalty': 0.8394738659076754, 'length_ratio': 0.8510783643993364, 'translation_length': 8721, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12210339323489289, 'precisions': [0.3900551190336578, 0.1707766439909297, 0.10853658536585366, 0.06889541450192004], 'brevity_penalty': 0.8173301149381055, 'length_ratio': 0.8321459939494487, 'translation_length': 8527, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12022375855849747, 'precisions': [0.3878171401847305, 0.16958486303304152, 0.10540867461551753, 0.06654796349877587], 'brevity_penalty': 0.8203213210593847, 'length_ratio': 0.8346833219478872, 'translation_length': 8553, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11527258897967484, 'precisions': [0.3846246681148926, 0.17006603081438004, 0.1065397053539346, 0.06529289187311878], 'brevity_penalty': 0.7892562339846001, 'length_ratio': 0.8086269151946911, 'translation_length': 8286, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11780022554993393, 'precisions': [0.39075530140576603, 0.1698685540950455, 0.10620567375886525, 0.06605504587155964], 'brevity_penalty': 0.8019147939225872, 'length_ratio': 0.8191665853420513, 'translation_length': 8394, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12033960559450518, 'precisions': [0.380623973727422, 0.1589248434237996, 0.09632224168126094, 0.05857142857142857], 'brevity_penalty': 0.8853877349037641, 'length_ratio': 0.8914804332975506, 'translation_length': 9135, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12048761690978936, 'precisions': [0.3822139166758272, 0.16076580120639916, 0.09741296710316193, 0.05837926107368851], 'brevity_penalty': 0.8812487866146104, 'length_ratio': 0.8877720308382941, 'translation_length': 9097, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11944153746310103, 'precisions': [0.3925301204819277, 0.17323180553521744, 0.10975170924793091, 0.0696969696969697], 'brevity_penalty': 0.7909042931780006, 'length_ratio': 0.8099931687323119, 'translation_length': 8300, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12083796590220613, 'precisions': [0.37978323379783235, 0.1606128648791441, 0.09837646680597975, 0.06055008210180624], 'brevity_penalty': 0.8752313948463067, 'length_ratio': 0.8824046062262125, 'translation_length': 9042, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12219457118684075, 'precisions': [0.39118393722594047, 0.16775538568450313, 0.10537119397878891, 0.06688888888888889], 'brevity_penalty': 0.833237044102556, 'length_ratio': 0.8457109397872548, 'translation_length': 8666, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11680627866580512, 'precisions': [0.3875399739429113, 0.16695352839931152, 0.10411537108688006, 0.06495571201453554], 'brevity_penalty': 0.807616366261078, 'length_ratio': 0.8239484727237241, 'translation_length': 8443, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11891963992321684, 'precisions': [0.3931748933577087, 0.17508167508167508, 0.11151670012776053, 0.07049917198959073], 'brevity_penalty': 0.7796792680772735, 'length_ratio': 0.800722162584171, 'translation_length': 8205, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11927418218337504, 'precisions': [0.3873190902825637, 0.16627505183137525, 0.10224776068953861, 0.06239130434782609], 'brevity_penalty': 0.8377761277848439, 'length_ratio': 0.8496145213233142, 'translation_length': 8706, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12211836366569179, 'precisions': [0.3901174844505874, 0.16946331992788796, 0.10563020921925498, 0.0654945054945055], 'brevity_penalty': 0.8350547352617792, 'length_ratio': 0.8472723724016785, 'translation_length': 8682, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12277550736269136, 'precisions': [0.38812941444520394, 0.1665526207745997, 0.10488336969290149, 0.06545296922410056], 'brevity_penalty': 0.8459033294741843, 'length_ratio': 0.856640968088221, 'translation_length': 8778, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11984991196787205, 'precisions': [0.38320330426801286, 0.16521739130434782, 0.10313447927199192, 0.06379757785467129], 'brevity_penalty': 0.8389082207728882, 'length_ratio': 0.8505904167073289, 'translation_length': 8716, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12295592029194351, 'precisions': [0.37666380420781453, 0.15768005098789037, 0.09707385044124478, 0.059124950729207724], 'brevity_penalty': 0.9048956962598861, 'length_ratio': 0.909144139748219, 'translation_length': 9316, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12193381638369097, 'precisions': [0.38525683789192794, 0.16177057025122957, 0.10074869791666667, 0.061464233270400674], 'brevity_penalty': 0.8699538637127764, 'length_ratio': 0.8777203083829413, 'translation_length': 8994, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12216368642957541, 'precisions': [0.38037940379403795, 0.15849883930874387, 0.09738829452485841, 0.05908449284129865], 'brevity_penalty': 0.8951303687932044, 'length_ratio': 0.900263491753684, 'translation_length': 9225, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12203973011282104, 'precisions': [0.38890830711872304, 0.16957255343082114, 0.1078194970719945, 0.067747667703243], 'brevity_penalty': 0.8237636632435801, 'length_ratio': 0.8376110080999317, 'translation_length': 8583, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12019561662881598, 'precisions': [0.39037183820958155, 0.16980866629150254, 0.10546334716459198, 0.06497871386959445], 'brevity_penalty': 0.8233052441383305, 'length_ratio': 0.8372206499463257, 'translation_length': 8579, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.127470850093765, 'precisions': [0.38994535519125684, 0.16525589269436125, 0.10274409700063816, 0.06441654661452974], 'brevity_penalty': 0.8870173732094067, 'length_ratio': 0.8929442763735728, 'translation_length': 9150, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12569305063608194, 'precisions': [0.3829506756014501, 0.16430817610062892, 0.10231444533120511, 0.0640947132067769], 'brevity_penalty': 0.8819033097639368, 'length_ratio': 0.888357568068703, 'translation_length': 9103, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12103010904218549, 'precisions': [0.3839518783339008, 0.16580381471389646, 0.10277731581573257, 0.06294004693834009], 'brevity_penalty': 0.8496097984356122, 'length_ratio': 0.8598614228554698, 'translation_length': 8811, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12405153540137381, 'precisions': [0.3827893175074184, 0.16360776088096488, 0.1010230179028133, 0.0620012277470841], 'brevity_penalty': 0.8814670029051701, 'length_ratio': 0.8879672099150971, 'translation_length': 9099, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11772016725284387, 'precisions': [0.38911242603550295, 0.1689353775612552, 0.10482879719051799, 0.06524694154961486], 'brevity_penalty': 0.8084287565598959, 'length_ratio': 0.8246315994925344, 'translation_length': 8450, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12041336443748354, 'precisions': [0.38654761904761903, 0.17159763313609466, 0.1093224836370069, 0.06986301369863014], 'brevity_penalty': 0.8026143416016254, 'length_ratio': 0.8197521225724602, 'translation_length': 8400, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11896766207567198, 'precisions': [0.38315280018437425, 0.16567226307756347, 0.10294615645106671, 0.06317846287451151], 'brevity_penalty': 0.8346005697879634, 'length_ratio': 0.8468820142480726, 'translation_length': 8678, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11523461794229725, 'precisions': [0.38733774185647807, 0.17266616878267363, 0.108330288637194, 0.06745005875440659], 'brevity_penalty': 0.7750427192294073, 'length_ratio': 0.7969161705865131, 'translation_length': 8166, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12492696620026558, 'precisions': [0.3886729594669628, 0.16551632598885055, 0.1038961038961039, 0.06326999373564418], 'brevity_penalty': 0.871165441454367, 'length_ratio': 0.8787937933053577, 'translation_length': 9005, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12950977048154352, 'precisions': [0.3864951768488746, 0.16554268990965773, 0.10297305667389285, 0.06326611308817714], 'brevity_penalty': 0.9063904533435665, 'length_ratio': 0.9105103932858397, 'translation_length': 9330, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12454516164277749, 'precisions': [0.3828047298043983, 0.16442333069411455, 0.10276616275329688, 0.06316872427983539], 'brevity_penalty': 0.8759990089584934, 'length_ratio': 0.883087732995023, 'translation_length': 9049, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1245714212587496, 'precisions': [0.39018850783556663, 0.1694614860259032, 0.10610878661087866, 0.06604590731918579], 'brevity_penalty': 0.8490489584247445, 'length_ratio': 0.8593734751634625, 'translation_length': 8806, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12576617734001633, 'precisions': [0.38318979614084814, 0.16424305375227213, 0.10188261351052048, 0.06232294617563739], 'brevity_penalty': 0.8895115894101571, 'length_ratio': 0.8951888357568069, 'translation_length': 9173, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11690575770007212, 'precisions': [0.38381044022131344, 0.1705392371766769, 0.10799213302342213, 0.06697353279631761], 'brevity_penalty': 0.7925502259825656, 'length_ratio': 0.8113594222699326, 'translation_length': 8314, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11979349111263884, 'precisions': [0.3758927590374684, 0.15858453473132372, 0.0974637103206253, 0.05865580448065173], 'brevity_penalty': 0.8816851772864813, 'length_ratio': 0.8881623889919, 'translation_length': 9101, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12738937551702847, 'precisions': [0.3790443539747161, 0.16266056212641486, 0.10100541376643465, 0.06253698954428881], 'brevity_penalty': 0.9068171554790965, 'length_ratio': 0.9109007514394457, 'translation_length': 9334, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.11890064620459145, 'precisions': [0.3864623552123552, 0.17324336218277836, 0.1101878612716763, 0.06973467950223057], 'brevity_penalty': 0.7894918012197684, 'length_ratio': 0.8088220942714941, 'translation_length': 8288, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.12374910080054202, 'precisions': [0.37474165125639075, 0.15993265993265993, 0.10015723270440252, 0.0618], 'brevity_penalty': 0.8916759894174714, 'length_ratio': 0.8971406265248365, 'translation_length': 9193, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1207480096252133, 'precisions': [0.3839897757639131, 0.16704035874439463, 0.10601374570446735, 0.06699201419698314], 'brevity_penalty': 0.826510562821342, 'length_ratio': 0.8399531570215673, 'translation_length': 8607, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1262568936426746, 'precisions': [0.38239173444713126, 0.16454700406450767, 0.10339852516832318, 0.06472892187177902], 'brevity_penalty': 0.881357899998883, 'length_ratio': 0.8878696203766956, 'translation_length': 9098, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


{'bleu': 0.1259724319826109, 'precisions': [0.3875719982277359, 0.16567420934233162, 0.10363872644574398, 0.06494325346784363], 'brevity_penalty': 0.8736946206868657, 'length_ratio': 0.8810383526885918, 'translation_length': 9028, 'reference_length': 10247}


Non-default generation parameters: {'max_length': 50}


KeyboardInterrupt: 

In [None]:
# trainer.save_model("/root/zindi/models/nllb/nllb_output/checkpoint-300")

Non-default generation parameters: {'max_length': 100}
