In [1]:
!pip install rouge-score
!pip install bert-score
!pip install jiwer

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=41aa08859cc40189f596ac9f6d0a1a22a698fd9f8fa6a93e2a1aa3945c8cab3d
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m876.9 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert-score
Successfully installed bert-score-0.3.13
Collecting jiwer
  Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)
Collecting rapidfuzz<4,>=3

In [2]:
!pip install nltk



In [3]:
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import single_meteor_score
from rouge_score import rouge_scorer
# from baseline_lpp.baseline import run_baseline
# from utils.paths import *
from jiwer import wer
from bert_score import BERTScorer
import socket
from urllib3.connection import HTTPConnection
import nltk

In [4]:
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:


"""
BERTScore (https://arxiv.org/abs/1904.09675)
"""
class BERTSCORE(object):
    """
    copied from https://github.com/HuthLab/semantic-decoding/blob/main/decoding/utils_eval.py
    """
    def __init__(self, idf_sents=None, rescale = True, score = "f"):
        self.metric = BERTScorer(lang = "en", rescale_with_baseline = rescale, idf = (idf_sents is not None), idf_sents = idf_sents)
        if score == "precision": self.score_id = 0
        elif score == "recall": self.score_id = 1
        else: self.score_id = 2

    def score(self, ref, pred):
        ref_strings = [" ".join(x) for x in ref]
        pred_strings = [" ".join(x) for x in pred]
        return self.metric.score(cands = pred_strings, refs = ref_strings,verbose=True)[self.score_id].numpy()


def create_metrics_df(ground_truth: list[str], predicted: list[str], include_bert_score = False) -> pd.DataFrame:
    """
    calculates metrics
    :param ground_truth:
    :param predicted:
    :return:
    """
    # set higher timeout mb to prevent connection timeout when downloading pytorch_model.bin
    HTTPConnection.default_socket_options = (
            HTTPConnection.default_socket_options + [
        (socket.SOL_SOCKET, socket.SO_SNDBUF, 2000000),
        (socket.SOL_SOCKET, socket.SO_RCVBUF, 2000000)
    ])

    # Initialize ROUGE scorer
    ROUGE_SCORER = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    if include_bert_score:
        BERT_SCORER = BERTSCORE()
    # Initialize variables to accumulate scores
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []
    bleu_scores = []
    meteor_scores = []
    wer_scores = []
    # bert_scores = []

    # Loop through each pair of ground truth and predicted sentences
    for gt, pred in zip(ground_truth, predicted):
        # Tokenize the sentences
        gt_tokens = gt.split()
        pred_tokens = pred.split()

        # Calculate ROUGE scores
        rouge_scores = ROUGE_SCORER.score(' '.join(gt_tokens), ' '.join(pred_tokens))
        rouge1_scores.append(rouge_scores['rouge1'].fmeasure)
        rouge2_scores.append(rouge_scores['rouge2'].fmeasure)
        rougeL_scores.append(rouge_scores['rougeL'].fmeasure)

        # Calculate BLEU score
        bleu_score = sentence_bleu(gt_tokens, pred_tokens)
        bleu_scores.append(bleu_score)

        # Calculate METEOR score
        meteor_score_value = single_meteor_score(gt_tokens, pred_tokens)
        meteor_scores.append(meteor_score_value)

        # Calculate word error rate (WER)
        wer_score = wer(reference=gt,hypothesis=pred)
        wer_scores.append(wer_score)

    # TODO: add BERTScore
    # add bert score if specified
    if include_bert_score:
        bert_scores = BERT_SCORER.score(ref=ground_truth, pred=predicted)
        # bert_scores.append(bs)

    df_scores = pd.DataFrame(
        {
            'gt': ground_truth,
            'pred': predicted,
            'rouge1': rouge1_scores,
            'rouge2': rouge2_scores,
            'rougeL': rougeL_scores,
            'bleu': bleu_scores,
            'meteor': meteor_scores,
            'wer':wer_scores,
            'bert_score': bert_scores

        }
    )
    return df_scores


# def save_baseline_metrics(include_bert_score:bool=False):
#     preproc_sentences_base, gen_sentences_base = run_baseline()
#     df_metrics_baseline = create_metrics_df(ground_truth=preproc_sentences_base, predicted=gen_sentences_base, include_bert_score=include_bert_score)
#     filename = 'baseline_metrics.csv'
#     save_path = os.path.join(eval_path, 'metrics')
#     file_path = os.path.join(save_path, filename)
#     if not os.path.exists(save_path):
#         os.makedirs(save_path)
#     df_metrics_baseline.to_csv(file_path, index=False)
#     print(f'saved {file_path}')

# if __name__ == "__main__":
#     save_baseline_metrics(include_bert_score =True)
#     print('done')


# load baseline pred


In [6]:
def load_baseline_pred():
  preproc_sentences = ['good morning said the little prince', 'good morning said the merchant', 'merchant this was a merchant', 'who sold pills that had been created', 'to quench thirst you could take', 'take one pill a week and you no longer', 'longer felt the need to drink anything', 'why are you selling those', 'asked the little prince it', 's a big time saver said the merchant', 'merchant experts have done calculations', 'calculations you save fifty three minutes', 'minutes per week and what', 'i do with the fifty three minutes', 'you can do anything you like with them', 'myself', 'the little prince said to himself', 'if i had fifty three minutes to spend', 'as i liked i d walk very', 'very slowly toward a spring of fresh water', 'water we were at', 'the eighth day since my accident in', 'the desert and i d listened to the story', 'story of the merchant as i drank the last', 'last drop of my water supply', 'ah i said to', 'the little prince these memories', 'memories of yours are very charming but', 'i haven t managed to repair my plane yet', 'yet i have nothing left to drink', 'and i too would be happy if', 'if i could walk slowly towards a spring', 'spring of fresh water', 'my friend the fox he said to', 'to me my dear fellow', 'our situation has nothing to do', 'with the fox anymore why not', 'not because we will die of', 'thirst he didn t follow', 'follow my reasoning and he answered me', 'it s nice to have had a friend', 'friend even if you re about to die', 'myself i m glad', 'to have had a fox as a friend', 'he never considers the danger', 'danger i said to myself he s never', 'never been hungry or thirsty a little', 'little sunshine is all he needs', 'but he looked at me and replied to', 'my thought i m also thirsty', 'thirsty let s look for a well', 'i made a gesture of weariness', 'weariness it s absurd to look', 'for a well at random in the immensity', 'immensity of the desert but we started', 'started walking anyway', 'when we had walked for hours', 'in silence night fell', 'and the stars began to come out', 'i saw them as if in a dream', 'as my thirst had made me feverish', 'the little prince s words', 'words danced in my memory', 'so you re also thirsty i asked', 'asked him but he didn t reply', 'to my question he said simply', 'simply water can be good for', 'the heart too i', 'didn t understand his answer but', 'i said nothing i knew better', 'better than to press my questions', 'he was tired he sat down', 'down i sat down beside him', 'him and after a silence', 'silence he spoke again', 'the stars are beautiful because of a', 'flower that can t be seen', 'i replied that s TRUE', 'TRUE and i looked without', 'without saying anything at the folds', 'folds of sand in the moonlight', 'the desert is beautiful', 'the little prince added', 'and it was TRUE i have always loved', 'loved the desert you sit', 'sit down on a sand dune you see nothing', 'nothing you hear nothing', 'and yet something radiates', 'radiates forth in the silence', 'what makes the desert', 'desert beautiful said the little', 'prince is that somewhere', 'it hides a well', 'i was surprised to suddenly', 'suddenly understand this mysterious radiation', 'radiation of the sands', 'when i was a little boy i', 'lived in an old house and', 'legend told that a treasure was buried', 'buried there of course no', 'one had ever been able to find it', 'or perhaps no one had even looked for', 'it but it cast an enchantment', 'enchantment over that house', 'my home was hiding a secret', 'secret in the depths of its heart', 'yes i said to the little', 'prince whether the house', 'house the stars or the desert', 'what gives them their beauty is something', 'something invisible i m', 'glad he said that you agree', 'with my fox', 'as the little prince fell asleep', 'asleep i took him in my arms and set', 'out walking again i felt', 'felt deeply moved it', 'it seemed to me that i was carrying a very', 'very fragile treasure it', 'it even seemed to me that there was nothing', 'nothing more fragile on earth', 'i looked in the moonlight', 'moonlight at his pale forehead his closed', 'closed eyes his locks of', 'hair that trembled in the wind', 'and i said to myself', 'what i see here is only a shell', 'shell that which is most important', 'important is invisible', 'as his slightly parted lips', 'lips gave way to a half smile i continued', 'continued what i find', 'so deeply moving about this little', 'sleeping prince is his devotion', 'devotion to a flower', 'it s the image of a rose that shines', 'shines in him like the flame of a lamp even', 'even when he s sleeping', 'and i came to think of him as even more', 'more fragile', 'one has to look after lamps', 'a gust of wind can put them out', 'and continuing to walk', 'walk i found the well at daybreak', 'daybreak men', 'men said the little prince stuff', 'stuff themselves into express trains', 'trains but they don t know what they re looking', 'for so they rush about and go', 'in circles and he added', 'added it s not worth it', 'the well we', 'had reached wasn t like the other wells', 'of the sahara the wells', 'of the sahara are mere holes', 'holes dug in the sand this one', 'one looked like a village well', 'but there was no village there', 'and i thought i was dreaming', 'it s strange i said to the little', 'prince everything s been prepared', 'prepared the pulley the bucket and the', 'rope he laughed took', 'the rope and put the pulley to work', 'work and the pulley moaned', 'moaned like an old weathervane when there has', 'has long been no wind', 'can you hear that said the little prince', 'prince we ve woken up the well', 'well and it s singing i didn', 'didn t want him to tire himself out', 'let me do it i said', 'it s too heavy for you', 'i hoisted the bucket slowly to the', 'the edge of the well and set it down good', 'good and level the song of the', 'pulley continued in my ears', 'ears and in the still trembling water', 'water i could see the sunlight shimmer', 'shimmer i m thirsty for this', 'this water said the little prince', 'give me some to drink', 'and i knew then what he d been looking', 'looking for i raised', 'the bucket to his lips he drank', 'drank his eyes closed', 'it was as sweet as some special', 'special festival treat', 'this water was something very different', 'different from ordinary nourishment', 'it was born of the walk under the stars', 'stars of the song of the pulley', 'pulley of the effort of my arms', 'it was good for the heart like', 'like a present when', 'i was a little boy the lights', 'of the christmas tree the music of the', 'the midnight mass the tenderness', 'tenderness in the smiles produced in a similar', 'similar way the radiance of the', 'gift that i received', 'the men where you live', 'said the little prince grow five', 'five thousand roses in a single garden', 'garden and they don t find what', 'they re looking for in it they', 'don t find it i replied', 'and yet what they re looking for', 'for could be found in a single', 'rose or in a little water', 'that s TRUE i said', 'and the little prince added', 'added but the eyes are blind', 'blind you have to search with the heart', 'i had drunk the', 'the water i breathed easily', 'easily the sand at sunrise', 'sunrise is the color of honey this', 'this honey color was also making', 'making me feel good why', 'why then did i have to have this sense', 'of grief you', 'have to keep your promise said', 'the little prince softly who had', 'again sat down beside me', 'what promise', 'you know a muzzle for my sheep', 'sheep i m responsible', 'for this flower', 'i took my sketches out of my pocket', 'pocket the little prince saw them', 'and laughed as he said your baobabs', 'baobabs they look a bit', 'like cabbages oh', 'and i d been so proud of my', 'baobabs your fox', 'fox his ears they look', 'look a bit like horns and they re too long', 'long and then he laughed again', 'again you aren t being fair my', 'my little fellow i don t know', 'how to draw anything except boa constrictors', 'constrictors closed and open', 'oh it ll be ok', 'said children understand', 'so i made a pencil', 'pencil sketch of a muzzle and i felt', 'felt a pang in my heart as i gave it', 'to him you have plans', 'plans that i don t know about', 'but he didn t respond', 'said to me you know', 'my descent to earth', 'earth tomorrow will be its anniversary', 'anniversary then after', 'a silence he went on', 'i came down very near here', 'and he blushed', 'and once again without understanding', 'understanding why i felt', 'a peculiar sense of sorrow', 'one question occurred', 'to me however so', 'it wasn t by chance that the morning', 'i first met you a week ago', 'you were out walking like that', 'all alone a thousand miles', 'miles from any inhabited region', 'you were going back to the place where you', 'landed the little', 'prince blushed again and', 'i added hesitantly', 'perhaps because of the anniversary', 'the little prince', 'prince blushed once more he never', 'never answered questions but', 'when you blush that means yes', 'doesn t it oh', 'oh i said to him i m worried', 'worried but he responded', 'now you must work you', 'must go back to your engine i', 'will wait for you here come back tomorrow', 'tomorrow evening but i', 'wasn t reassured i remembered', 'remembered the fox you run', 'run the risk of weeping a little if', 'you allow yourself to be tamed']

  predictions = ['got pacific blowing fashion land dwellings', 'reached disturbed ah metallic throw', 'glance creature conference hours dangerous', 'concerned invisible entirely absence reviving handsome regret', 'appears indeed bear normal proud tale', 'fond etiquette wasted understood thundered point obey scarf plummeting', 'surely near weeds today thursday forehead lowered', 'weigh down apologize pulley boxes', 'wake sixty bite vanity barren', 'pieces moon serious science trip source gone handkerchief', 'chickens days shot receives echo', 'proof naive obedience concluded different radiance', 'along softly among roots wings', 'minister bees birds humiliated hurrying roads surprised', 'reassured boys poetic drama confessed business golf spotted', 'thing', 'approach bed fifth distance grass safe', 'improved island raised bite herein scholars million dont', 'nine different rule suggested watched requires indies', 'spare army pieces nice recount problems squares words', 'tour neckties obeyed abyss', 'fat suffice further remarked tragic sky drinkers', 'doubt tiny footsteps diamond odds revolution urgency wings unique', 'based die problems wasted ink hesitated belongs hours dispatch', 'lazy cough coughed railing rheumatism hour', 'concerned minute help streetlamp', 'tightly sum minute shut seas', 'dress name spring arms alone forgiveness etiquette', 'astounded proud opened based lots serious nearer foolish full', 'ermine importance surely tigers astounded rumpled ah', 'apologize hammer thanks living surely urgency sponged', 'stake sometimes alive lives appear essential ephemeral chance', 'minds confuse point promptly', 'wings someone intelligent based indeed read strange', 'disappointed golden wealth alive flock', 'remembered strolling upright humiliated chimney excuse', 'ten breakfast lowered knowing cabin bottles', 'upset wonderful honest asking stylishly everybody', 'best anymore stand leisure secretly', 'sea decides men seventh offer worry consulted', 'effort solve reassured bed hair chance wings became', 'caterpillars rightly end screen robe recording falling engineer', 'convenient thinking protect absent', 'powerful rise dug impressive music completely ever embarrassed', 'restrain square attention behind sitting', 'tightly tames daydream based adornment sketched huge ambassador', 'neighborhood interrupted obey companions wise thunder sun', 'simplicity exists sharpened universe opposite double', 'showing brought gentleman grass brought silent meters weed', 'danger lead details contact squashing abashed', 'objects reasons almanac sadly pacific land drank', 'states actually crazy almanac bite destroy', 'disclosure unique smallest fashion cigarette strap', 'finds underground thursday miraculous monarch form crazy noise', 'wrapped whether roads wind understood conditions impressive', 'africa watched attack', 'wings torment intoxicated heating suffer forever', 'led shoulders near flow', 'advise passing disappeared problems reach sole job', 'used large legs drawn suffer treat reach offer', 'needed none ermine soil double mistake rag', 'dwellings sweetness decided doll killed', 'tour listen bank realised determinedly', 'seventy vineyards chance shelter rite middle minded', 'owns imposed sprinkling wore beg colleague nearer', 'coughed conference pointing count revolution onto', 'hair creatures received geographers truly rusty', 'play comforted send colleague', 'breaking music wander kind departure plucked', 'sponged meters wisely shouldn laughs caravan', 'engineer cabin bushes electricity recorded reach', 'linger stone killed hurrying slept revolution', 'condemn happy ambassador diamond fairly loves', 'blowing drew someday remember faithful', 'explanations colour loosened dear', 'being kindness lamps remembered sends stop mushroom', 'tightly humiliated visiting grieve cause else', 'absence emerge ordinary peace nine', 'best snapped exists purpose concerned', 'surprised worked stared bear majestic bread', 'seed lots double sunrise risks afar', 'noses risks puzzled detests', 'sputtered sponged point die', 'islet footsteps point consulted suffice tracks beauty now', 'problems shall mantle hours finger', 'judged dirty knows conference shrug eats dressed seas noses', 'somebody grumpily hastily poor', 'speaking strolling occasion think', 'run disclosure creature hesitated dwellings', 'forty discover least thunder', 'relentlessly caught businessmen gesture friends', 'discipline lies foolish leant', 'obvious companions wake case', 'public onto wasn walked breakfast', 'pulley trees happiness stage empties', 'tools whatever admit asleep', 'petals stage follows lack inflict stone disturbed', 'sadly play seventy fairy point stared', 'gold restless locomotive guess regret caterpillars flat', 'pointed wise continents drinkers improved', 'sitting quite relentlessly rare watch remorse normal volcano', 'cant begun understands shelter god bringing passes drawn', 'second convinced salute seeds majestically wonderful', 'understood recount sheltered south', 'find interest sweetly disappear frozen lock', 'provide thursday inquiry despite leading blowing window', 'completely recognise saluted lonely watch quietly', 'asteroids stroke firmly proves', 'pleasure crossed seem wasn ashamed explain', 'received advised shrugging black interested hurry either', 'remained forever o since', 'contradictory level timidly sum pace begun', 'ankle beware lightly', 'saying studied obedience froze ten between', 'thunder confidences level stylishly sixth plunged being second grew', 'thinks sunlight destroy record fingers', 'became wander haven respected', 'presentable serves knees hand further misunderstandings saluting daybreak movements twice', 'grumpily concluded suggested important', 'lowered observe flies side obviously passer ugly sometimes horns', 'late shall seventh rich given', 'subjects deep drifted lowering spray', 'patient attention odd passer nondescript somewhere trick', 'naively catch dear watches missed', 'tended urgency intelligent radiance side africa', 'bread making engineer coming remembered', 'mustn add gradually subjects record astronomy square rocked', 'hard dispatch handsome lies thundered pieces', 'decides falls absurd', 'balderdash normal sprouts mind pulled', 'foxes courteously motionless already boys lie sharpened similar play', 'absurd taken forgetting our', 'rightly crazy pointed readied bored leisure', 'unlike longer frozen absent body', 'stake am opposite problems', 'write obey squash intelligent wouldn rock recognise soon familiar', 'existed demand recognised weeds questioned natural escape longer businessman hunters', 'father chimney happier yawned eruptions', 'firmly siberia angry owning brisk determinedly squashing stroke going railing', 'fires hours', 'inflict excuse sum else secretive disappointed', 'skipped unique laugh scatterbrain help worlds sire snapped', 'math full complicated quietly', 'living dear made today gentleman besides reminded', 'larger god', 'dead golf sight waited indies everybody', 'recounting requires springs pulled shell', 'dumbfounded somehow exchange rite dwellings falls vineyards start upon army', 'easily hammer tames conference shoulders bonds foot', 'soon horns regularly within mine', 'object blowing dead leads recognised suffering', 'forever rifle simplicity', 'big responded wheat smiled meeting ankle doves arranged', 'admirers herd grease made rare', 'needed rejected realised intrigued during within', 'snapped kind name problems bells god mind', 'stone game inside herself told passes', 'must impressive attack return noise sum', 'fifteen similar snow exchange breeze o', 'despite understood intelligent nearer nearer humiliated wasn rag', 'torment points pacific lead bees', 'frightful apparent shuts express guides rat none', 'groomed seabird finds moment', 'belonging past detests dont trick slept problem best', 'sole squashing bored full fasten', 'none late born wake living plants slipped alight', 'leads haven mine happened bothersome', 'astounded ship treat grew powerful whether coil allow', 'rumbling favourable dressed reassure scatterbrain cause stop', 'south yawned form cool swell confidences naively', 'power muffler spoke eyes froze creature fifty readied', 'shut neckties eyes regretting interrupted siberia', 'passes tears squares tolerate torment short', 'problem demand becomes telling fairly solve acclaim', 'sweet millions minds leads girls guides hair boy pieces respectfully', 'blowing sprinkling killed belong belongs god dont', 'embarrassed alight universal peace drifted', 'changes improved drawer pure streetlamp road stopped', 'sends closely itself bolt reminded nice stirred', 'arrived suffice normal proud common questioned', 'retorted slowly bees catch ship international', 'glimpse whoever lonely arranged alive', 'discipline emerge bridge locomotive comforted regulated disclosure tames crying', 'recount asteroids etiquette tames', 'window beg sweep puzzled creature abyss scarf', 'experiment repeated pardon needed', 'importance absence yawned wishes minds sprouted sprinkling', 'reign passes drawer', 'mushroom public wake arranged shot manage', 'owns regretting ten sigh', 'pause remember fingers ephemeral vineyards lie huh enter bank', 'irreparable majestic extraordinary caravan today softly lasts', 'hastened doll obedience homesick dumbfounded form thinking', 'snapped kind else recording suggested robe reviving', 'effort surely worrying shells', 'forgetting lesson clapped off simple character impressive', 'allow belong laughs plummeting courteously inquired shock bud', 'favourable rheumatism rites men watching', 'importance somewhere questions getting giving getting maintain hanging', 'show fountain migration cried wait ate', 'opera astronomy along despised', 'gesture rusty dirty astronomy rumpled', 'sunlight reassure pulley repeats crazy caught', 'talking scatterbrain vineyards goodbye blowing lying heard', 'watching likes upright suffice brilliantly crazy slipped', 'enquired heard across result golf modest sprout', 'entirely judged hearing revolution fold couldn', 'astonished brought ram least linger box vexed', 'dispatch wish stand falls laughed frozen none', 'railing coil clearly contrivances respectfully closely', 'may prettiest fifteen comforted maintain', 'quietly somewhere universe sprouted mantle', 'snow handsome pole cabin taking needed', 'kill golf dismantling trick trees common round leather', 'rare based gradually pick', 'reply dress saying doubt thoroughly', 'politely previous bud businessmen huge', 'pain shoots father skies began dictator kill', 'altogether suffice huge admit hand dispatch', 'filled neck shut conceited prettiest', 'strolling rumpled leading needed admit stroke pain crying saying', 'foolish sputtered forgotten', 'heart politics noise protect laughs colour', 'hammer whether mean makes weed tames', 'gold someday page lie tightly', 'cool humble', 'roof volcano pure mantle risks advised pride', 'cried belongs reminded memories', 'states wondering hours', 'muzzle politely listen scarf truly colour insubordination rumpled', 'send waited dumbfounded pain rocked show', 'politics scholars watches doll played mushroom volcanic', 'angry dare tightly happening packs', 'imposed brothers springs', 'aroused enquired recounting wells legs drinkers fun arrived', 'siberia heard dream', 'result tired deep pulley wind', 'shell packs maybe monotonous loved grease huge kinds drank silly', 'exists tended turkish ballet seventy visiting', 'woken questioned shuts shot angry besides demand', 'conference worrying became imposed dream squash shell', 'ordinary pour softness finger books drank star', 'crying month slept capable', 'describe window rifle franc sunlight', 'secret laughs show', 'waited opera wasn dangerous hearing', 'lowered froze appearance opera bringing needles emerge authority', 'indies glanced reassured blowing similar saying thunder ambassador instant laughs', 'room pocket objected speaking laughing', 'properly arrived squares worried changed geraniums embarrassed', 'majestic redid decorated page asteroids', 'advantage bothersome information meters speak', 'kinds motionless describe bees', 'neighborhood grow lucky bed someday lowering', 'watch ship peace', 'likes forgot precious quiet island', 'tames faithful nondescript complete sat electricity', 'gold drinking hard', 'ambassador volcanic boys lies ink', 'colour readied creature result', 'dance treasure obey falling disturbed', 'needed shall disappointed', 'siberia growing waits spins', 'red fellow sprout held monotonous footstool danger gone', 'restless noise guns even animals movements nice', 'moving side simple others crossed capable', 'tames spoke pulley absurd spot', 'goodbye village chimney twilight crossed', 'squashing object improved embarrassed stood noses started memories today', 'whether near drifted', 'hastily wells army franc', 'caught forehead mix', 'unhappy exchange round friend hair', 'possible firmly find', 'square forever found disappointed empties grass', 'your dictator wake bees', 'faithful necessary experiment vexed proud asks', 'taken decided vexed greet', 'poison strides bridge found page motionless imposed motionless', 'showing miraculous missed glance', 'noise drawer glanced sponged hunters', 'patent determinedly deep understood tale opened patent', 'show remained breeze ashamed travelled geographers completely hair', 'linger remained take dream', 'administer either fun afar helps', 'road rise huh conference boys', 'havoc today homesick somewhere shelter angry forever imposed', 'strides recorded continued pace lightly absent']



  return preproc_sentences,predictions

# run metrics


In [7]:
  preproc_sentences_base, gen_sentences_base = load_baseline_pred()


In [8]:
len(preproc_sentences_base)

288

In [9]:
len(gen_sentences_base)

288

In [None]:
# preproc_sentences_base = preproc_sentences_base[:10]
# gen_sentences_base = gen_sentences_base[:10]


In [10]:
df_metrics_baseline = create_metrics_df(ground_truth=preproc_sentences_base, predicted=gen_sentences_base, include_bert_score=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


calculating scores...
computing bert embedding.


  0%|          | 0/9 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/5 [00:00<?, ?it/s]

done in 218.53 seconds, 1.32 sentences/sec


In [11]:
df_metrics_baseline.to_csv('bayesian_metrics_per_participant_58.csv')

In [12]:
df_metrics_baseline.shape
1120*49


54880

In [13]:
df = df_metrics_baseline.describe()

In [14]:
df.to_csv('bayesian_metrics_per_participant_58_stats.csv')

In [15]:
df

Unnamed: 0,rouge1,rouge2,rougeL,bleu,meteor,wer,bert_score
count,288.0,288.0,288.0,288.0,288.0,288.0,288.0
mean,0.0,0.0,0.0,7.894354999999999e-234,0.001326,1.0,0.506375
std,0.0,0.0,0.0,0.0,0.010178,0.0,0.06363
min,0.0,0.0,0.0,0.0,0.0,1.0,0.305072
25%,0.0,0.0,0.0,0.0,0.0,1.0,0.463046
50%,0.0,0.0,0.0,0.0,0.0,1.0,0.515209
75%,0.0,0.0,0.0,0.0,0.0,1.0,0.551161
max,0.0,0.0,0.0,1.28823e-231,0.1,1.0,0.654952
