In [1]:
# utils
import pandas as pd
import os
import numpy as np
from nltk.tokenize import word_tokenize
from tqdm import tqdm
import ast
import re
from scipy.stats import ttest_rel

# evaluation 
from evaluate import load

In [2]:
# open files in generations directory
def open_files(directory):
    files = os.listdir(directory)
    files.sort()
    return files

In [3]:
models_generations = {
    'en': {},
    'it': {},
    'ru': {},
    'ge': {}
}

files = open_files('generations')
for file in files:
    model = re.sub(r"^fine-tuned-|-exp2-gen\d+.csv", "", file)
    gen = re.search(r"gen\d+", file).group()

    model_generations = pd.read_csv('generations/' + file)
    # convert string to list
    model_generations['actual'] = model_generations['actual'].apply(ast.literal_eval)
    
    # if containt -en-
    if '-en-' in file:
        if model not in models_generations['en']:
            models_generations['en'][model] = {}
        models_generations['en'][model][gen] = model_generations
        print(f"model: {model} - gen: {gen}, lang: en")
    elif '-it-' in file:
        if model not in models_generations['it']:
            models_generations['it'][model] = {}
        models_generations['it'][model][gen] = model_generations
        print(f"model: {model} - gen: {gen}, lang: it")
    elif '-ge-' in file:
        if model not in models_generations['ge']:
            models_generations['ge'][model] = {}
        models_generations['ge'][model][gen] = model_generations
        print(f"model: {model} - ge: {gen}, lang: ge")
    elif '-ru-' in file:
        if model not in models_generations['ru']:
            models_generations['ru'][model] = {}
        models_generations['ru'][model][gen] = model_generations
        print(f"model: {model} - gen: {gen}, lang: ru")

model: Llama-3.1-8B-Instruct-en - gen: gen0, lang: en
model: Llama-3.1-8B-Instruct-en - gen: gen1, lang: en
model: Llama-3.1-8B-Instruct-en - gen: gen2, lang: en
model: Llama-3.1-8B-Instruct-ge - ge: gen0, lang: ge
model: Llama-3.1-8B-Instruct-ge - ge: gen1, lang: ge
model: Llama-3.1-8B-Instruct-ge - ge: gen2, lang: ge
model: Llama-3.1-8B-Instruct-it - gen: gen0, lang: it
model: Llama-3.1-8B-Instruct-it - gen: gen1, lang: it
model: Llama-3.1-8B-Instruct-it - gen: gen2, lang: it
model: Llama-3.1-8B-Instruct-ru - gen: gen0, lang: ru
model: Llama-3.1-8B-Instruct-ru - gen: gen1, lang: ru
model: Llama-3.1-8B-Instruct-ru - gen: gen2, lang: ru
model: Mistral-Nemo-Instruct-2407-en - gen: gen0, lang: en
model: Mistral-Nemo-Instruct-2407-en - gen: gen1, lang: en
model: Mistral-Nemo-Instruct-2407-en - gen: gen2, lang: en
model: Mistral-Nemo-Instruct-2407-ge - ge: gen0, lang: ge
model: Mistral-Nemo-Instruct-2407-ge - ge: gen1, lang: ge
model: Mistral-Nemo-Instruct-2407-ge - ge: gen2, lang: ge
mode

## Valutazione automatica

### <a href="https://huggingface.co/spaces/evaluate-metric/bleu">Bleu</a>

In [4]:
bleu = load("bleu")

Using the latest cached version of the module from C:\Users\OliverioM\.cache\huggingface\modules\evaluate_modules\metrics\evaluate-metric--bleu\9e0985c1200e367cce45605ce0ecb5ede079894e0f24f54613fca08eeb8aff76 (last modified on Wed Sep  4 17:21:14 2024) since it couldn't be found locally at evaluate-metric--bleu, or remotely on the Hugging Face Hub.


In [5]:
bleu_scores = {'en': {}, 'it': {}, 'ge': {}, 'ru': {}}

for lang in models_generations:
    for model in models_generations[lang]:
        model_bleu_scores = []
        print(f'Language: {lang}, Model: {model}')
        
        for gen in sorted(models_generations[lang][model]):  # Ensure order (gen0, gen1, gen2)
            references = models_generations[lang][model][gen]['actual']
            predictions = models_generations[lang][model][gen]['prediction']

            gen_bleu_scores = []
            for i in tqdm(range(len(references)), desc="Calcolo punteggi BLEU"):
                results = bleu.compute(predictions=[predictions[i]], references=[references[i]])
                gen_bleu_scores.append(results["bleu"])

            model_bleu_scores.append(gen_bleu_scores)  # Store BLEU for each generation
        
        # Store the three BLEU scores instead of their mean
        bleu_scores[lang][model] = model_bleu_scores
        print(f'BLEU scores for {model} ({lang}): {model_bleu_scores}')
        print(f'Average: {np.mean(model_bleu_scores)}')
        print()

Language: en, Model: Llama-3.1-8B-Instruct-en


Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:02<00:00, 133.94it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 125.16it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 124.96it/s]


BLEU scores for Llama-3.1-8B-Instruct-en (en): [[1.0, 0.6389431042462724, 0.0, 0.5431501067716656, 0.16245284024690473, 0.6909485625039599, 0.45986108016994914, 0.4554383163361387, 1.0, 0.7748677442328399, 0.45444956236094664, 0.604349704366814, 1.0, 1.0, 0.8891397050194614, 1.0, 0.8857000285382948, 1.0, 0.7028316885452014, 0.9621954581957615, 0.6524729016168254, 0.6836460043639834, 0.2975928234249097, 1.0, 0.94149097734812, 0.537284965911771, 1.0, 0.5691068337963667, 0.5883105707205901, 0.4408231875586727, 0.8307018474412792, 0.43550229045828076, 0.8225681733052571, 0.7039070022305045, 0.789357525105527, 1.0, 0.6909485625039599, 0.5671276403520978, 1.0, 0.8266188889979827, 0.4839281708164425, 0.48633831680799433, 0.40096084846312413, 0.5891223348606782, 0.788200954197212, 0.5348765896724532, 0.7186082239261685, 1.0, 1.0, 0.5757575636202255, 1.0, 0.8768419153065673, 1.0, 0.317023313852343, 0.640751803175311, 0.7513763122938849, 1.0, 0.8907003299890247, 0.7653312225290797, 0.70168793912

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 109.43it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 98.91it/s] 
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 116.23it/s]


BLEU scores for Mistral-Nemo-Instruct-2407-en (en): [[1.0, 0.6389431042462724, 0.0, 0.5671687320916967, 0.12064803452402914, 0.8870909062679921, 0.37283992459416565, 0.35406256950112003, 1.0, 0.7748677442328399, 0.4287232854924631, 0.7003652415618387, 1.0, 0.0, 0.8891397050194614, 1.0, 0.8857000285382948, 1.0, 0.0, 1.0, 0.5005335865721833, 0.6903822463805598, 0.7311104457090247, 0.8648454150926429, 0.605540583950642, 0.537284965911771, 1.0, 0.6680785126715956, 0.5165213935736707, 0.4408231875586727, 0.5814307369682193, 0.39242591746953154, 0.8080952687355902, 0.5233694611909245, 0.789357525105527, 0.6238986072117501, 0.7184162110235452, 0.9584146563694087, 0.7985065516266613, 0.6767045368190813, 0.37704209027116137, 0.48633831680799433, 0.2562956311149168, 0.5927478741786792, 0.5654397417534262, 0.5737014600396702, 0.7186082239261685, 0.9234732618882052, 1.0, 0.5757575636202255, 1.0, 0.8768419153065673, 1.0, 0.317023313852343, 0.640751803175311, 0.7361410881038141, 1.0, 0.7557847762798

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:04<00:00, 88.56it/s] 
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 104.28it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 115.49it/s]


BLEU scores for Qwen2.5-7B-Instruct-en (en): [[1.0, 0.6389431042462724, 0.41722614486115056, 0.6308518147952611, 0.13139217875016068, 0.5527192911852292, 0.34756561191481233, 0.4781516879503063, 1.0, 0.7748677442328399, 0.6346301963183836, 0.5792157224731826, 1.0, 0.6606328636027614, 0.8891397050194614, 0.6756000774035172, 0.8761560783209453, 0.6223329772884784, 0.0, 0.9621954581957615, 0.5628970419870812, 0.7912455827969054, 0.6471892368478446, 0.8648454150926429, 0.7232241912229858, 1.0, 1.0, 0.6680785126715956, 0.4914498405430853, 0.5445178846139404, 1.0, 0.6038143863903804, 0.7786452373079144, 0.32169129762039694, 0.7825422900366437, 0.5307712171072443, 0.3508772012923553, 0.838662938917912, 0.8333521524725502, 0.5460241725418133, 0.5857575145749553, 0.659109604993135, 0.4317853842116786, 0.4442846042822245, 0.788200954197212, 0.6430990508114238, 0.7186082239261685, 1.0, 1.0, 0.5757575636202255, 1.0, 0.8521617416227857, 1.0, 0.32711318587705646, 0.6156695719210522, 0.58893462099765

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 104.84it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 118.18it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 104.34it/s]


BLEU scores for Llama-3.1-8B-Instruct-it (it): [[1.0, 1.0, 0.0, 0.8274031647663784, 0.2812488198404138, 0.5472202896156649, 0.44903323241491255, 0.4165767636794606, 1.0, 0.7896895367562644, 0.4411263598481451, 0.0, 1.0, 0.7506238537503395, 1.0, 1.0, 1.0, 1.0, 0.7155696432986562, 1.0, 0.6451289333598902, 0.6226719241943228, 0.7007069665923001, 0.7348889200874658, 0.7203294535577252, 1.0, 1.0, 0.0, 0.8130527826633674, 0.24712442545253582, 1.0, 0.4544489097785626, 0.7977820063991994, 0.4659645478099519, 1.0, 1.0, 0.7941963785512203, 0.7839306339139199, 0.7848518349390632, 0.9069800139256777, 0.34332704063408953, 0.0, 0.28171718914025734, 0.47979644408651617, 0.80377750806414, 0.8570980059247856, 0.3508439695638686, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.5623413251903491, 0.42386852679520176, 0.2646015952359329, 1.0, 0.8907003299890247, 0.5853044306603972, 0.0, 0.43167001068522526, 0.6012663367693701, 0.829288386658654, 0.8761151623788772, 0.6730489965212471, 1.0, 0.7143598737694116, 0.5266403878

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 97.89it/s] 
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 112.48it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 111.75it/s]


BLEU scores for Mistral-Nemo-Instruct-2407-it (it): [[1.0, 1.0, 0.4160751652217845, 0.5466167118536712, 0.23626187156344583, 0.6114193239581327, 0.5269003930556608, 0.6660963293607925, 1.0, 0.7778111223054219, 0.28433291815307693, 0.4186703741203227, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.6742708196670307, 0.7611606003349892, 0.829165132702379, 0.871083756385876, 0.3477250470582593, 1.0, 0.6868247058200974, 1.0, 1.0, 0.0, 0.5501156519279469, 0.24712442545253582, 1.0, 0.4814564802258215, 0.8012989896861095, 0.2941377838901496, 0.8265168183793802, 1.0, 0.8593887047640296, 0.7725715625854126, 1.0, 0.7848518349390632, 0.48745576571356625, 0.34389312176578424, 0.283173140409153, 0.333086849847418, 0.80377750806414, 0.7869981594416393, 1.0, 0.36720562698935927, 0.0, 0.7105477917053865, 1.0, 0.7067351086370333, 1.0, 0.5623413251903491, 0.7588809164123451, 0.5249110810825803, 1.0, 0.7337036588726153, 0.877582607148048, 0.0, 0.43167001068522526, 0.6962280713338013, 1.0, 0.7577395672414201, 0.673048996

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 109.24it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 103.30it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 103.11it/s]


BLEU scores for Qwen2.5-7B-Instruct-it (it): [[0.2796842457966537, 0.4630777161991027, 0.0, 0.5252034293335157, 0.1531305080006172, 0.6399329493579472, 0.5744888426961038, 0.5509064257961308, 1.0, 0.7778111223054219, 0.3329562935398428, 0.0, 1.0, 0.7506238537503395, 1.0, 0.488923022434901, 0.5828233954152654, 1.0, 0.0, 1.0, 0.6123757859934763, 0.839587623092576, 0.7238348098810832, 0.6703420896351792, 0.5686658363061537, 0.7598356856515925, 0.8995407682327061, 0.33917013943875235, 0.5676351376297435, 0.24712442545253582, 0.6606328636027614, 0.4867643691804454, 0.7540040278779003, 0.301862665526534, 0.7635332920902254, 0.5156626918239822, 0.8862476419965991, 0.38376612417372785, 0.8137489370974955, 0.7094521095075527, 0.48745576571356625, 0.0, 0.3059630177442128, 0.4285640799742894, 1.0, 0.8250534283031511, 0.317023313852343, 1.0, 0.0, 0.7307717333430739, 1.0, 0.6337925566729508, 1.0, 0.40824829046386296, 0.5151727504710328, 0.41841993715998216, 1.0, 0.8907003299890247, 0.79347605685747

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 116.96it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 117.72it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 112.95it/s]


BLEU scores for Llama-3.1-8B-Instruct-ru (ru): [[1.0, 1.0, 0.4111336169005197, 0.3538995029906511, 0.0, 0.3784157746763232, 0.24405051724608798, 0.0, 1.0, 0.6739047062564734, 0.33819598660582717, 0.8857000285382948, 1.0, 0.4347208719449915, 1.0, 1.0, 0.48549177170732344, 0.0, 0.0, 1.0, 0.5170678810621915, 0.5919783121240433, 0.0, 1.0, 0.0, 0.668740304976422, 1.0, 0.0, 0.2995015510125632, 0.0, 0.5969491792019646, 0.0, 0.6631503138799142, 0.1902239321133787, 0.4221068126374527, 0.0, 0.539020965801877, 0.0, 1.0, 0.7628510251001487, 0.3235946184239223, 0.4012671145009053, 0.2801342621283878, 0.1795782617347238, 0.6471892368478446, 0.6567939165466562, 0.4962644776757998, 1.0, 1.0, 0.5773502691896258, 1.0, 0.7641254083103469, 0.0, 0.2557665697125104, 0.42849655626964983, 0.33141202373036377, 1.0, 0.30277029197532096, 0.35345481163747966, 0.3155984539112945, 0.0, 0.4062892372820313, 0.2782087319667435, 0.2689054715066592, 0.5568544122775908, 0.0, 0.6476370820445139, 0.0, 0.3979182783756331, 0

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 104.02it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 114.91it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 117.11it/s]


BLEU scores for Mistral-Nemo-Instruct-2407-ru (ru): [[0.4111336169005197, 1.0, 0.0, 0.6370555116759559, 0.23002725116422643, 0.3556359469499498, 0.0, 0.0, 1.0, 0.5702822264405542, 0.4111336169005197, 0.632708767532676, 1.0, 0.25965358893403384, 1.0, 0.0, 0.5969491792019646, 0.558394826472418, 0.0, 1.0, 0.4597848727002179, 0.5610310064032589, 0.3646285861936466, 0.6580370064762462, 0.0, 0.0, 1.0, 0.0, 0.2312735373463785, 0.27901593935858265, 0.5969491792019646, 0.5320157068147425, 0.46293095527520456, 0.4423879866494836, 0.4221068126374527, 0.0, 0.7407991337415321, 0.35986074277491664, 1.0, 0.7628510251001487, 0.3082627646062185, 0.4440750605884706, 0.3318942108582755, 0.3341305167420625, 0.744373319312051, 0.7159023375272214, 0.6905911470987942, 0.0, 0.6042750794713536, 0.6238986072117501, 1.0, 0.6531420255892322, 1.0, 0.6529942057256104, 0.0, 0.4568735785454562, 0.48235607976922595, 0.7294246829467232, 0.38366415258372866, 1.0, 0.0, 0.2797330485027647, 0.24375427155462506, 0.268905471

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 116.42it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 114.93it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 119.93it/s]


BLEU scores for Qwen2.5-7B-Instruct-ru (ru): [[0.7598356856515925, 1.0, 0.0, 0.3082331946100712, 0.0, 0.6797016272640598, 0.2534743707366162, 0.0, 1.0, 0.6026080978557137, 0.4111336169005197, 0.5595642461260143, 1.0, 0.0, 1.0, 0.6042750794713536, 0.0, 0.0, 0.0, 1.0, 0.46792593632038904, 0.4558459331006228, 0.7016035864257111, 0.6580370064762462, 0.4029883231412915, 0.0, 1.0, 0.16923267918690046, 0.435239942567339, 0.0, 0.5969491792019646, 0.30223684058200134, 0.7383682348551674, 0.5691176092710594, 0.7285959997974691, 0.0, 0.37399962732073283, 0.8388870904650976, 0.39328368415488135, 0.5982749147637114, 0.31437754726042405, 0.4440750605884706, 0.3096654005488453, 0.46254778821493464, 0.5502151926234128, 0.48773413452296055, 0.6905911470987942, 0.0, 0.6042750794713536, 0.6238986072117501, 1.0, 0.3814636646398287, 1.0, 0.6529942057256104, 0.38080073483863147, 0.4314159820008676, 1.0, 0.0, 0.15806979715915764, 0.5578002860768766, 0.0, 0.5356213475308882, 0.24264382743890414, 0.60662182296

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 100.97it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 115.27it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 109.25it/s]


BLEU scores for Llama-3.1-8B-Instruct-ge (ge): [[1.0, 0.8948393168143697, 0.0, 0.4562367262019882, 0.0, 0.6348073328850725, 0.46330597318302275, 0.5275980999336181, 0.0, 1.0, 0.4016650474304947, 0.9071112036324318, 0.7016879391277371, 1.0, 0.7476743906106103, 0.4548019047027907, 0.5757575636202255, 1.0, 0.6341922683775969, 0.7476743906106103, 0.7192367996522091, 0.7997605032399495, 0.5312583871630396, 0.541822042580596, 0.7487402156832422, 1.0, 1.0, 0.0, 0.5192108761155978, 0.0, 0.3549481056010053, 0.6623645504445912, 0.8219655850102567, 0.5373698475633983, 0.4527471870952893, 1.0, 0.43317377303208376, 0.8743414417652072, 1.0, 0.8235296495684443, 0.2551219280636033, 0.669961753544709, 0.196046355324564, 0.0, 0.6803749333171202, 0.6382401024752941, 0.7825422900366437, 0.8265168183793802, 1.0, 0.5747078645171895, 1.0, 0.8696398662122882, 1.0, 0.0, 0.5038601987543927, 0.5154337627145953, 0.8931539818068694, 0.7362096820824198, 0.6267671821810656, 0.7016879391277371, 0.0, 0.700961800242092

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 106.21it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 114.19it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 110.85it/s]


BLEU scores for Mistral-Nemo-Instruct-2407-ge (ge): [[1.0, 0.8948393168143697, 0.0, 0.4562367262019882, 0.0, 0.4873483706735273, 0.44130156394078607, 0.5030760719034105, 0.0, 1.0, 0.3672654922979082, 0.6811873227441255, 0.537284965911771, 0.0, 0.7476743906106103, 0.4548019047027907, 0.5757575636202255, 0.5623413251903491, 0.6598573198708221, 1.0, 0.5935409122738204, 0.6998534451614725, 0.37239098949398236, 0.5773502691896258, 0.45180100180492244, 0.537284965911771, 1.0, 0.2451829991731284, 0.47238380914874, 0.4408231875586727, 0.3549481056010053, 0.5333791967180695, 0.7663316581060059, 0.4298002582667181, 0.6781836812426998, 0.5558391766749284, 0.5224385466734036, 0.6729864884660303, 1.0, 0.8787419089273848, 0.2618449527244832, 0.48633831680799433, 0.2323526860207039, 0.31361832916936394, 0.7098891248983344, 0.3739514690696618, 0.7506238537503395, 0.4379518644116554, 1.0, 0.9554427922043668, 1.0, 0.3855924429638652, 1.0, 0.0, 0.5073659863347824, 0.7826164119688067, 1.0, 0.7890139526519

Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 120.62it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 125.07it/s]
Calcolo punteggi BLEU: 100%|██████████| 383/383 [00:03<00:00, 111.34it/s]

BLEU scores for Qwen2.5-7B-Instruct-ge (ge): [[1.0, 1.0, 0.0, 0.40737303500287686, 0.0, 0.5451087893190223, 0.4596980088392874, 0.24026767328685925, 0.0, 0.564926870711699, 0.33725973214458643, 0.7075494147561419, 0.8132882808488929, 0.0, 0.5156626918239822, 0.0, 0.46563348805256366, 0.7941386679207173, 0.36191740494054153, 1.0, 0.5037744498395924, 0.6120737901860179, 0.4354294657746997, 0.0, 0.21186050864016664, 1.0, 1.0, 0.23636142259818999, 0.4438233377140732, 0.4408231875586727, 0.47750342648354643, 0.7575151896815032, 0.7490325070687434, 0.321774771113985, 0.6527196653219786, 0.3549481056010052, 0.5096448601550383, 0.5248780147760078, 1.0, 0.9291564424807219, 0.3219583496901924, 0.54413074047686, 0.26567149115019906, 0.14719696960805126, 0.6803749333171202, 0.33266046761138807, 0.7506238537503395, 0.39458812555917666, 1.0, 0.4487432014166947, 0.0, 0.4431197491221154, 1.0, 0.0, 0.517865601584045, 0.41583634222861793, 1.0, 0.6757594721831311, 0.5036586029926082, 0.7016879391277371, 




In [7]:
import pickle

# Save BLEU scores
with open('bleu_scores-exp2.pkl', 'wb') as f:
    pickle.dump(bleu_scores, f)