In [12]:
!pip install rouge-score
!pip install bert-score
!pip install jiwer

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=ea11ffb534ba9c6f377235e318868c239fd5c391ff637238a604165a74a27f88
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert-score
Successfully installed bert-score-0.3.13
Collecting jiwer
  Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)
Collecting rapidfuzz<4,>=3 (

In [13]:
!pip install nltk



In [14]:
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import single_meteor_score
from rouge_score import rouge_scorer
# from baseline_lpp.baseline import run_baseline
# from utils.paths import *
from jiwer import wer
from bert_score import BERTScorer
import socket
from urllib3.connection import HTTPConnection
import nltk

In [15]:
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [16]:


"""
BERTScore (https://arxiv.org/abs/1904.09675)
"""
class BERTSCORE(object):
    """
    copied from https://github.com/HuthLab/semantic-decoding/blob/main/decoding/utils_eval.py
    """
    def __init__(self, idf_sents=None, rescale = True, score = "f"):
        self.metric = BERTScorer(lang = "en", rescale_with_baseline = rescale, idf = (idf_sents is not None), idf_sents = idf_sents)
        if score == "precision": self.score_id = 0
        elif score == "recall": self.score_id = 1
        else: self.score_id = 2

    def score(self, ref, pred):
        ref_strings = [" ".join(x) for x in ref]
        pred_strings = [" ".join(x) for x in pred]
        return self.metric.score(cands = pred_strings, refs = ref_strings,verbose=True)[self.score_id].numpy()


def create_metrics_df(ground_truth: list[str], predicted: list[str], include_bert_score = False) -> pd.DataFrame:
    """
    calculates metrics
    :param ground_truth:
    :param predicted:
    :return:
    """
    # set higher timeout mb to prevent connection timeout when downloading pytorch_model.bin
    HTTPConnection.default_socket_options = (
            HTTPConnection.default_socket_options + [
        (socket.SOL_SOCKET, socket.SO_SNDBUF, 2000000),
        (socket.SOL_SOCKET, socket.SO_RCVBUF, 2000000)
    ])

    # Initialize ROUGE scorer
    ROUGE_SCORER = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    if include_bert_score:
        BERT_SCORER = BERTSCORE()
    # Initialize variables to accumulate scores
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []
    bleu_scores = []
    meteor_scores = []
    wer_scores = []
    # bert_scores = []

    # Loop through each pair of ground truth and predicted sentences
    for gt, pred in zip(ground_truth, predicted):
        # Tokenize the sentences
        gt_tokens = gt.split()
        pred_tokens = pred.split()

        # Calculate ROUGE scores
        rouge_scores = ROUGE_SCORER.score(' '.join(gt_tokens), ' '.join(pred_tokens))
        rouge1_scores.append(rouge_scores['rouge1'].fmeasure)
        rouge2_scores.append(rouge_scores['rouge2'].fmeasure)
        rougeL_scores.append(rouge_scores['rougeL'].fmeasure)

        # Calculate BLEU score
        bleu_score = sentence_bleu(gt_tokens, pred_tokens)
        bleu_scores.append(bleu_score)

        # Calculate METEOR score
        meteor_score_value = single_meteor_score(gt_tokens, pred_tokens)
        meteor_scores.append(meteor_score_value)

        # Calculate word error rate (WER)
        wer_score = wer(reference=gt,hypothesis=pred)
        wer_scores.append(wer_score)

    # TODO: add BERTScore
    # add bert score if specified
    if include_bert_score:
        bert_scores = BERT_SCORER.score(ref=ground_truth, pred=predicted)
        # bert_scores.append(bs)

    df_scores = pd.DataFrame(
        {
            'gt': ground_truth,
            'pred': predicted,
            'rouge1': rouge1_scores,
            'rouge2': rouge2_scores,
            'rougeL': rougeL_scores,
            'bleu': bleu_scores,
            'meteor': meteor_scores,
            'wer':wer_scores,
            'bert_score': bert_scores

        }
    )
    return df_scores


# def save_baseline_metrics(include_bert_score:bool=False):
#     preproc_sentences_base, gen_sentences_base = run_baseline()
#     df_metrics_baseline = create_metrics_df(ground_truth=preproc_sentences_base, predicted=gen_sentences_base, include_bert_score=include_bert_score)
#     filename = 'baseline_metrics.csv'
#     save_path = os.path.join(eval_path, 'metrics')
#     file_path = os.path.join(save_path, filename)
#     if not os.path.exists(save_path):
#         os.makedirs(save_path)
#     df_metrics_baseline.to_csv(file_path, index=False)
#     print(f'saved {file_path}')

# if __name__ == "__main__":
#     save_baseline_metrics(include_bert_score =True)
#     print('done')


# load baseline pred


In [18]:
def load_baseline_pred():
  preproc_sentences = ['good morning said the little prince', 'good morning said the merchant', 'merchant this was a merchant', 'who sold pills that had been created', 'to quench thirst you could take', 'take one pill a week and you no longer', 'longer felt the need to drink anything', 'why are you selling those', 'asked the little prince it', 's a big time saver said the merchant', 'merchant experts have done calculations', 'calculations you save fifty three minutes', 'minutes per week and what', 'i do with the fifty three minutes', 'you can do anything you like with them', 'myself', 'the little prince said to himself', 'if i had fifty three minutes to spend', 'as i liked i d walk very', 'very slowly toward a spring of fresh water', 'water we were at', 'the eighth day since my accident in', 'the desert and i d listened to the story', 'story of the merchant as i drank the last', 'last drop of my water supply', 'ah i said to', 'the little prince these memories', 'memories of yours are very charming but', 'i haven t managed to repair my plane yet', 'yet i have nothing left to drink', 'and i too would be happy if', 'if i could walk slowly towards a spring', 'spring of fresh water', 'my friend the fox he said to', 'to me my dear fellow', 'our situation has nothing to do', 'with the fox anymore why not', 'not because we will die of', 'thirst he didn t follow', 'follow my reasoning and he answered me', 'it s nice to have had a friend', 'friend even if you re about to die', 'myself i m glad', 'to have had a fox as a friend', 'he never considers the danger', 'danger i said to myself he s never', 'never been hungry or thirsty a little', 'little sunshine is all he needs', 'but he looked at me and replied to', 'my thought i m also thirsty', 'thirsty let s look for a well', 'i made a gesture of weariness', 'weariness it s absurd to look', 'for a well at random in the immensity', 'immensity of the desert but we started', 'started walking anyway', 'when we had walked for hours', 'in silence night fell', 'and the stars began to come out', 'i saw them as if in a dream', 'as my thirst had made me feverish', 'the little prince s words', 'words danced in my memory', 'so you re also thirsty i asked', 'asked him but he didn t reply', 'to my question he said simply', 'simply water can be good for', 'the heart too i', 'didn t understand his answer but', 'i said nothing i knew better', 'better than to press my questions', 'he was tired he sat down', 'down i sat down beside him', 'him and after a silence', 'silence he spoke again', 'the stars are beautiful because of a', 'flower that can t be seen', 'i replied that s TRUE', 'TRUE and i looked without', 'without saying anything at the folds', 'folds of sand in the moonlight', 'the desert is beautiful', 'the little prince added', 'and it was TRUE i have always loved', 'loved the desert you sit', 'sit down on a sand dune you see nothing', 'nothing you hear nothing', 'and yet something radiates', 'radiates forth in the silence', 'what makes the desert', 'desert beautiful said the little', 'prince is that somewhere', 'it hides a well', 'i was surprised to suddenly', 'suddenly understand this mysterious radiation', 'radiation of the sands', 'when i was a little boy i', 'lived in an old house and', 'legend told that a treasure was buried', 'buried there of course no', 'one had ever been able to find it', 'or perhaps no one had even looked for', 'it but it cast an enchantment', 'enchantment over that house', 'my home was hiding a secret', 'secret in the depths of its heart', 'yes i said to the little', 'prince whether the house', 'house the stars or the desert', 'what gives them their beauty is something', 'something invisible i m', 'glad he said that you agree', 'with my fox', 'as the little prince fell asleep', 'asleep i took him in my arms and set', 'out walking again i felt', 'felt deeply moved it', 'it seemed to me that i was carrying a very', 'very fragile treasure it', 'it even seemed to me that there was nothing', 'nothing more fragile on earth', 'i looked in the moonlight', 'moonlight at his pale forehead his closed', 'closed eyes his locks of', 'hair that trembled in the wind', 'and i said to myself', 'what i see here is only a shell', 'shell that which is most important', 'important is invisible', 'as his slightly parted lips', 'lips gave way to a half smile i continued', 'continued what i find', 'so deeply moving about this little', 'sleeping prince is his devotion', 'devotion to a flower', 'it s the image of a rose that shines', 'shines in him like the flame of a lamp even', 'even when he s sleeping', 'and i came to think of him as even more', 'more fragile', 'one has to look after lamps', 'a gust of wind can put them out', 'and continuing to walk', 'walk i found the well at daybreak', 'daybreak men', 'men said the little prince stuff', 'stuff themselves into express trains', 'trains but they don t know what they re looking', 'for so they rush about and go', 'in circles and he added', 'added it s not worth it', 'the well we', 'had reached wasn t like the other wells', 'of the sahara the wells', 'of the sahara are mere holes', 'holes dug in the sand this one', 'one looked like a village well', 'but there was no village there', 'and i thought i was dreaming', 'it s strange i said to the little', 'prince everything s been prepared', 'prepared the pulley the bucket and the', 'rope he laughed took', 'the rope and put the pulley to work', 'work and the pulley moaned', 'moaned like an old weathervane when there has', 'has long been no wind', 'can you hear that said the little prince', 'prince we ve woken up the well', 'well and it s singing i didn', 'didn t want him to tire himself out', 'let me do it i said', 'it s too heavy for you', 'i hoisted the bucket slowly to the', 'the edge of the well and set it down good', 'good and level the song of the', 'pulley continued in my ears', 'ears and in the still trembling water', 'water i could see the sunlight shimmer', 'shimmer i m thirsty for this', 'this water said the little prince', 'give me some to drink', 'and i knew then what he d been looking', 'looking for i raised', 'the bucket to his lips he drank', 'drank his eyes closed', 'it was as sweet as some special', 'special festival treat', 'this water was something very different', 'different from ordinary nourishment', 'it was born of the walk under the stars', 'stars of the song of the pulley', 'pulley of the effort of my arms', 'it was good for the heart like', 'like a present when', 'i was a little boy the lights', 'of the christmas tree the music of the', 'the midnight mass the tenderness', 'tenderness in the smiles produced in a similar', 'similar way the radiance of the', 'gift that i received', 'the men where you live', 'said the little prince grow five', 'five thousand roses in a single garden', 'garden and they don t find what', 'they re looking for in it they', 'don t find it i replied', 'and yet what they re looking for', 'for could be found in a single', 'rose or in a little water', 'that s TRUE i said', 'and the little prince added', 'added but the eyes are blind', 'blind you have to search with the heart', 'i had drunk the', 'the water i breathed easily', 'easily the sand at sunrise', 'sunrise is the color of honey this', 'this honey color was also making', 'making me feel good why', 'why then did i have to have this sense', 'of grief you', 'have to keep your promise said', 'the little prince softly who had', 'again sat down beside me', 'what promise', 'you know a muzzle for my sheep', 'sheep i m responsible', 'for this flower', 'i took my sketches out of my pocket', 'pocket the little prince saw them', 'and laughed as he said your baobabs', 'baobabs they look a bit', 'like cabbages oh', 'and i d been so proud of my', 'baobabs your fox', 'fox his ears they look', 'look a bit like horns and they re too long', 'long and then he laughed again', 'again you aren t being fair my', 'my little fellow i don t know', 'how to draw anything except boa constrictors', 'constrictors closed and open', 'oh it ll be ok', 'said children understand', 'so i made a pencil', 'pencil sketch of a muzzle and i felt', 'felt a pang in my heart as i gave it', 'to him you have plans', 'plans that i don t know about', 'but he didn t respond', 'said to me you know', 'my descent to earth', 'earth tomorrow will be its anniversary', 'anniversary then after', 'a silence he went on', 'i came down very near here', 'and he blushed', 'and once again without understanding', 'understanding why i felt', 'a peculiar sense of sorrow', 'one question occurred', 'to me however so', 'it wasn t by chance that the morning', 'i first met you a week ago', 'you were out walking like that', 'all alone a thousand miles', 'miles from any inhabited region', 'you were going back to the place where you', 'landed the little', 'prince blushed again and', 'i added hesitantly', 'perhaps because of the anniversary', 'the little prince', 'prince blushed once more he never', 'never answered questions but', 'when you blush that means yes', 'doesn t it oh', 'oh i said to him i m worried', 'worried but he responded', 'now you must work you', 'must go back to your engine i', 'will wait for you here come back tomorrow', 'tomorrow evening but i', 'wasn t reassured i remembered', 'remembered the fox you run', 'run the risk of weeping a little if', 'you allow yourself to be tamed']

  predictions = ['similar great found heard destroy lying', 'bringing ruin enquired concerned happening', 'constrictor proves minded point means', 'guess purpose wrong grumpily wish among contrivances', 'universal mix acclaim revealed snow contact', 'woken poetic remained moving exchange disturbed motionless yourselves admit', 'conference power looking rat angry black majestically', 'price bud obey satisfied pour', 'mine sweet silly crying tasks', 'absence tried falling entry guns redid bulky sunlight', 'laughing odds butterflies remorse between', 'gesture silly spite discovery prepared bolt', 'reasons whatever noses above tracks', 'scholars intoxicated attention forehead handkerchief sell happening', 'walk deep caravan sum fifty sat retorted enquired', 'lesson', 'solve passer music saluted rest laughed', 'readied existed moving fifth sprinkling warn confessed lie', 'full prepared attention since drew foot disappearance', 'judged similar absence drinking recording lots plunged favour', 'walked rag spare discover', 'lucky along thunder carefully lucky listen minute', 'pulley cumbersome flow ideas fairly alive living tracks linger', 'belong making lots remained drawer lamplighters arms point register', 'show extraordinary majestic sheltered blowing wasn', 'guess during square remorse', 'concluded mushroom climbed faithful box', 'islet set engineer someday closer drink poetic', 'speak raised drank belong needles falling least reserve remembered', 'pacific reputation conference stared speak squares enquired', 'rich breeze puzzled electricity conference questions wrapped', 'recognise truly finally perfumed loved underground pulley unique', 'brothers visiting guns animals', 'shot lazy footstool belong mix arms bonds', 'start stroke ermine finally properly', 'shoulders chimney wondering forehead hunt problems', 'altogether asks majestically explain suffer sorrow', 'bank astonished horns decided tiny naively', 'fond presentable elephants tames advantage', 'led weigh whatever burst soon brilliantly itll', 'complete arrived homesick passer wise foxes shot wind', 'sitting worried whoever courage upset thundered thing weed', 'golf drifted become trouble', 'public remember stone subtle send most pull apparent', 'over courteously locomotive breathe laughs', 'concluded asking alike belong flock pure administer recitals', 'advise grammar motionless squash else besides stage', 'conditions hair disappear convenient eternal details', 'stay repeat stopped rest repeat sweet breeze millions', 'insubordination studied poor mountain stroke watered', 'work naive down waits phew destroy worked', 'heals ten proposed down fifth dress', 'squashing linger negro heard obvious responded', 'worst getting judging fires billion approach work feel', 'coil buy sell larger moving remember subtle', 'saluted softness direction', 'shot squashing confessed falling wide weren', 'fit shatter power guides', 'hastened improved fifteen may quietly owning miraculous', 'forgiveness doubt public rusty wide mind quietly pole', 'ugly seek pacific swell details reproaches kind', 'lying dictator cabin pole sweep', 'walked flies pause fountain obviously', 'imposed meeting unique telling repeats stopped peace', 'sharpened cool money neighborhood convinced ridiculous least', 'majestic false fires explanations alive fault', 'wise skies heating yawning crossed anymore', 'ballet boast firmly ridiculous', 'groomed send rage happiness lowered show', 'wrapped stared mind closely lasts dug', 'inquired suffer lock filled somebody quietly', 'maintain used everyone handkerchief oceans manage', 'ankle scholars impatient ideas reassured birds', 'opposite pace rheumatism capable universe', 'obedience child bridge promptly', 'cried lesson meters must bees quickly reproaches', 'concluded forehead reach contrary ideas poppies', 'firmly businessmen minister adore volcanic', 'fond stuck hair woken guess', 'happening became condemn acclaim afar record', 'nice needles details bends proves brought', 'changed proves hastily ready', 'simplicity wrapped guns spite', 'tree flow guns itll mustn shuts waste spare', 'lots tragic bewildered point present', 'scarf island turkish false consulted dressed reply herd beat', 'math forgetting tools changes', 'stroll chimney owns boys', 'likes squashing regretting short lying', 'lowered frozen form besides', 'comes sat bud forgot sire', 'imply extraordinary reserve off', 'shell spot mine wealth', 'forgotten fault portraits forever asks', 'far spray goodbye sprouts indeed', 'likes havoc herself may', 'empties sprouts save gathered tamed used ruin', 'waits ballet imposed humiliated guns condemn', 'stop before making ordinary contrivances realising pleasure', 'coughed else window sharpened readied', 'laughs travel comes knees trick snake revealed understands', 'pain discipline afraid telling sends pointed surely today', 'regularly monarch happened astonished drama giving', 'astronomy recording recollections temples', 'quiet end america fairy forty leading', 'yawning judging rifle nevertheless necessary opposite vexed', 'most forsaken reply double trick linger', 'creature pointing twice recollections', 'deserts past got result brothers roots', 'heating france wall geographers radish simple ocean', 'puts tale chance moistened', 'alone worth ask mushroom bushes forgive', 'seventh wouldn softly', 'listen dated spite throw altogether falls', 'sweetly better worth fold somehow bridge goodbye regularly lie', 'confidences mistake snakes determinedly strides', 'absent rage blessed tasks', 'powerful glance during underground thunder sweetness veritable sprout upon overwhelmed', 'forgetting franc since apologize', 'object sadly side strolling stunned wounds direction truly lowering', 'lock recognise bracelet curiosity sadly', 'next names scarf provide chasing', 'worrying rocked cleaned wounds repeated nondescript miraculous', 'watches rusty promptly sprout drawn', 'objected questions explorers given rag saluted', 'record etiquette inquired business must', 'plummeting second secretive states determinedly rite courteously living', 'favourable register among extraordinary exchange absence', 'weigh courage travelers', 'motionless revealed land foot simplicity', 'siberia mustn effort danced hand shoots step moon ballet', 'travelers being breaking lighters', 'worried proposed coughed despised skies absent', 'lighters interested forty judged frightful', 'homesick game eternal trees', 'watching motionless sense footstool recount ephemeral forsaken rock price', 'butterflies kingdom appearance patient urgency loosened public interested larger amuse', 'reviving irreparable quite suggested science', 'shrub behind subjects sight lack reasons stroke pointing continued sheltered', 'bear point', 'tamed problems negro poppies brilliantly start', 'lonely linger interesting torment square together trip shouldn', 'confessed remembered locomotive linger', 'electricity promptly luck under stopped bed late', 'lit sends', 'response longer peace coming mouth naively', 'onto foot recognised simplicity wings', 'going appears humanity repeats lying courage meeting watching almanac similar', 'seventy breeze restless false grew empties adornment', 'rock lowering south swell based', 'roads opposite response scatterbrain appearance humble', 'tale serious conditions', 'rather imagination tale leads satisfied seventh patent mainly', 'name grumble thursday luck knees', 'stay twice properly pulled caravan swell', 'stuck happiness set lots everybody sends streetlamp', 'used actually shut pride whim shouldn', 'sheltered owns twelve disappeared becomes mushroom', 'sharp moon feel rites understood chance', 'nevertheless moving explorers least bracelet forehead portraits kind', 'squashing shelter great studied intoxicated', 'beauty understands despite sixth engine relentlessly seek', 'gold different worst dirty', 'raised exist ready lie miraculous across die fond', 'owning stroke skies passing game', 'majesty disclosure bother mine electricity lock none passing', 'scatterbrain blessed knowing passes gives', 'allow comforted mind explorers stake buy normal astounded', 'dress ballet inquiry changed torment ideas quickly', 'temples suggested wasted leisure seems sad watches', 'diamond shock stunned falls throw regretting recounting despised', 'faithful rag falls lights taking behind', 'surely opinion sixty leant fifty witnessed', 'drawn kingdom disappointed beg explained arrived clearly', 'return exercise someone scatterbrain game engine wise object absence flat', 'found money sweep leather reminded sends lie', 'weren passing pleasure problems sea', 'strap mainly islet watch angry sheltered lie', 'bees intrigued alright hurry sponged drinking puzzled', 'ink mustn revealed snow kindness urgency', 'facing frightful hanging rusty comforted appeared', 'hurry depend double mainly lead', 'imply businessmen suddenly foxes boast mantle squashing restless further', 'fellow creature poetic restless', 'knew convinced thanks hastily regretting discover yourselves', 'silly thorns regularly stay', 'reputation firmly suggested points brothers springs money', 'talked surely islet', 'reproaches forgotten mine mainly grieve realised', 'sharpened lights altogether admit', 'caterpillars capable strides pulley meeting shoots describe travelled pause', 'bread patent remarked dug rat enter hurrying', 'drifted stand spite questioned going approach drifted', 'shouldn happiness poppies thundered reassure upset wish', 'complete conference monotony third', 'together reached snapped misunderstandings near huge owns', 'astounded leather lasts admit mustn detests often shells', 'fat blowing neckties whatever heart', 'reputation nondescript changes steps travelled deep wishes produce', 'trains convinced africa colleague watched froze', 'instantly rite reasons vineyards', 'forgot apologize island rite squares', 'belongs changed sailor round proposed sat', 'lives torment meeting relentlessly opposite aroused easy', 'waited watch wondering along burrow proposed whether', 'cabin easy demand dear retorted handsome presence', 'proud scarf recount alive fourth speaking', 'blushed nor fingers balderdash maintain bells departure', 'clean defend elephants tour lamps forty instant', 'sheltered body despised command flat golden', 'patent bread serves boast unhappy', 'saying nondescript convenient disappeared bewildered', 'lead among half suffer tightly ugly', 'belonging retorted objected miraculous spray kindness idea behind', 'knees belong secretive secretive', 'apparition islet isn kill grass', 'questioned burrow shells bud administer', 'step studied successful acclaim tools yawn belonging', 'dance along administer advised entertainment clean', 'plucked honest adornment brisk invention', 'chimney squares necessary ugly advised talking step further listen', 'reserve simplicity burn', 'recorded shrugging becomes obvious lasts child', 'breeze sprinkling science european opened asking', 'stop invention sprouted shoots concluded', 'leisure clean', 'different understands worth bewildered proves lights irreparable', 'colleague reminded late entirely', 'heals gravely point', 'retorted questioned flies yourselves crossed child patient squares', 'yawned coming bigger step big trains', 'information tears colour stand sum reproaches flash', 'subjects politics concluded hesitated receives', 'cool decides catch', 'glad father onto bees thoughts sharpened knew right', 'behind easy inquired', 'dear born pointing satisfied larger', 'wings receives packs sunsets pulled thursday count spring worked leapt', 'hair objected happiness plunged imposed reach', 'offer urgency despite stood subjects bed kingdom', 'false monotony absent cool redid sense wings', 'secret worlds cigarette present add muzzle upon', 'granite nearer across infested', 'killed vexed serious seem belongs', 'stylishly lasts trains', 'coming instantly portraits hair recount', 'majestically throw discipline silent pointed absolute businessmen monotony', 'suggested leads homesick opposite moon isn sweetly recitals problem lasts', 'afar volcanic recognised stroll present', 'treasure ink alight understood grave realised apparent', 'afar window regulated sprouted creature', 'music gives point breeze travelled', 'thinking follows killed hanging', 'advise cause excuse passer rheumatism deeds', 'trick attack problems', 'watch repeat run often existed', 'restless universe shell eleven shoulders filled', 'stop houses truth', 'impatient flash hand extraordinary intelligent', 'child chance regretting dear', 'granite rites motionless thing ruin', 'ugly recognise start', 'room strap sixth roots', 'exchange buy presence common id lucky insubordination bulky', 'before becomes told impressive justice missed upright', 'nice strolling hastened watching past infested', 'asking stunned rocked travelers inquiry', 'relentlessly contradictory irreparable changes past', 'stroke eager readied apparent glanced beat footsteps condemn rat', 'sprinkling power sea', 'tools bees similar effort', 'sat black leaning', 'empties humanity laughing muffler wise', 'ship defend quiet', 'courteously tale short start spring rest', 'wait windows mine hanging', 'universe loves silly timidly snow russia', 'grew cabin timidly eye', 'page tree suddenly short sprouted effort etiquette seven', 'stay days drawn wealth', 'becomes hammer leads wrapped anymore', 'waits golf names moving contact drank stage', 'trains moral understood brothers wells yawning most wise', 'maintain puts coil redid', 'common ocean knew whom showing', 'reputation remember describe minded hand', 'yawns rat questioned nondescript telling subjects tale cool', 'approach m seas bushes kindness trouble']


  return preproc_sentences,predictions

# run metrics


In [19]:
  preproc_sentences_base, gen_sentences_base = load_baseline_pred()


In [20]:
len(preproc_sentences_base)

288

In [21]:
len(gen_sentences_base)

288

In [None]:
# preproc_sentences_base = preproc_sentences_base[:10]
# gen_sentences_base = gen_sentences_base[:10]


In [22]:
df_metrics_baseline = create_metrics_df(ground_truth=preproc_sentences_base, predicted=gen_sentences_base, include_bert_score=True)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


  0%|          | 0/9 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/5 [00:00<?, ?it/s]

done in 193.16 seconds, 1.49 sentences/sec


In [23]:
df_metrics_baseline.to_csv('bayesian_metrics_per_participant_57.csv')

In [24]:
df_metrics_baseline.shape
1120*49


54880

In [25]:
df = df_metrics_baseline.describe()

In [26]:
df.to_csv('bayesian_metrics_per_participant_57_stats.csv')

In [27]:
df

Unnamed: 0,rouge1,rouge2,rougeL,bleu,meteor,wer,bert_score
count,288.0,288.0,288.0,288.0,288.0,288.0,288.0
mean,0.001707,0.0,0.001707,4.04183e-234,0.003642,1.0,0.505051
std,0.016961,0.0,0.016961,0.0,0.017572,0.0,0.064436
min,0.0,0.0,0.0,0.0,0.0,1.0,0.287034
25%,0.0,0.0,0.0,0.0,0.0,1.0,0.465724
50%,0.0,0.0,0.0,0.0,0.0,1.0,0.51545
75%,0.0,0.0,0.0,0.0,0.0,1.0,0.552705
max,0.2,0.0,0.2,1.164047e-231,0.125,1.0,0.660048
