In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Apr 22 12:39:48 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


In [3]:
# install datasets
!pip install datasets

from datasets import list_datasets, list_metrics, load_dataset, load_metric

from pprint import pprint
!pip install torch
#!pip install -q pytorch-lightning
!pip install -q transformers


import transformers
from torch.utils.data import DataLoader, TensorDataset, random_split, RandomSampler, Dataset
import pandas as pd
import numpy as np

from transformers import BartForConditionalGeneration, BartTokenizer
from tqdm import tqdm
import torch


#import torch.nn.functional as F
#import pytorch_lightning as pl
#import torch
#from pytorch_lightning.callbacks import ModelCheckpoint

#import math
#import random
#import re
import argparse



In [4]:
# Downloading and loading a dataset
#dataset = load_dataset('cnn_dailymail', '3.0.0')

In [5]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=False)
root_dir = "/content/gdrive/My Drive/masters_thesis/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [6]:
# Got from BART Training
!pip install -q pytorch-lightning
!pip install -q transformers
import torch.nn.functional as F
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint

class LitModel(pl.LightningModule):
  # Instantiate the model
  def __init__(self, learning_rate, tokenizer, model, freeze_encoder, freeze_embeds):
    super().__init__()
    self.tokenizer = tokenizer
    self.model = model
    self.learning_rate = learning_rate
    #self.hparams = hparams
    self.freeze_encoder = freeze_encoder
    self.freeze_embeds = freeze_embeds

    if self.freeze_encoder:
      freeze_params(self.model.get_encoder())

    if self.freeze_embeds:
      self.freeze_embeds()
  
  def freeze_embeds(self):
    ''' freeze the positional embedding parameters of the model; adapted from finetune.py '''
    freeze_params(self.model.model.shared)
    for d in [self.model.model.encoder, self.model.model.decoder]:
      freeze_params(d.embed_positions)
      freeze_params(d.embed_tokens)

  # Do a forward pass through the model
  def forward(self, input_ids, **kwargs):
    return self.model(input_ids, **kwargs)
  
  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr = self.learning_rate)
    return optimizer

  def training_step(self, batch, batch_idx):
    # Load the data into variables
    src_ids, src_mask = batch[0], batch[1]
    tgt_ids = batch[2]
    # Shift the decoder tokens right (but NOT the tgt_ids)
    decoder_input_ids = shift_tokens_right(tgt_ids, tokenizer.pad_token_id)

    # Run the model and get the logits
    outputs = self(src_ids, attention_mask=src_mask, decoder_input_ids=decoder_input_ids, use_cache=False)
    lm_logits = outputs[0]
    # Create the loss function
    ce_loss_fct = torch.nn.CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
    # Calculate the loss on the un-shifted tokens
    loss = ce_loss_fct(lm_logits.view(-1, lm_logits.shape[-1]), tgt_ids.view(-1))

    return {'loss':loss}

  def validation_step(self, batch, batch_idx):

    src_ids, src_mask = batch[0], batch[1]
    tgt_ids = batch[2]

    decoder_input_ids = shift_tokens_right(tgt_ids, tokenizer.pad_token_id)
    
    # Run the model and get the logits
    outputs = self(src_ids, attention_mask=src_mask, decoder_input_ids=decoder_input_ids, use_cache=False)
    lm_logits = outputs[0]

    ce_loss_fct = torch.nn.CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
    val_loss = ce_loss_fct(lm_logits.view(-1, lm_logits.shape[-1]), tgt_ids.view(-1))

    return {'loss': val_loss}
  
  # Method that generates text using the BartForConditionalGeneration's generate() method
  def generate_text(self, text, eval_beams, early_stopping = True, max_len = 40):
    ''' Function to generate text '''
    generated_ids = self.model.generate(
        text["input_ids"],
        attention_mask=text["attention_mask"],
        use_cache=True,
        decoder_start_token_id = self.tokenizer.pad_token_id,
        num_beams= eval_beams,
        max_length = max_len,
        early_stopping = early_stopping
    )
    return [self.tokenizer.decode(w, skip_special_tokens=True, clean_up_tokenization_spaces=True) for w in generated_ids]

def freeze_params(model):
  ''' Function that takes a model as input (or part of a model) and freezes the layers for faster training
      adapted from finetune.py '''
  for layer in model.parameters():
    layer.requires_grade = False

In [7]:
# using_custom = True
# global_model = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device) 
# if using_custom:
#tokenizer = BartTokenizer.from_pretrained('facebook/bart-base', add_prefix_space=True)
#bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
#global_model = LitModel.load_from_checkpoint(root_dir + "checkpoint_files_complete/setup2_training_model.ckpt",
#                                      learning_rate = 2e-5, tokenizer = tokenizer, model = bart_model, freeze_encoder = True, freeze_embeds = False)

In [8]:
#import re
import pickle
from datetime import datetime
# import copy
import csv

DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
#DEFAULT_DEVICE = "cuda"
def create_csv(all_sentences, targets, file_to_write):
  sources = []
  fieldnames = ["source", "target"]
  test_array = []
  with open(file_to_write, 'w') as csvfile:
    csvwriter = csv.DictWriter(csvfile, delimiter=',', fieldnames=fieldnames)
    for t in range(len(targets)):
        test_array.append({"source": all_sentences[t], "target": targets[t]})
        #writer.writerow({sources[t], targets[t]})
    csvwriter.writerow(dict((fn,fn) for fn in fieldnames))
    for row in test_array:
      csvwriter.writerow(row)


def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]

def generate_summaries(lns, metric, batch_size=1, device=DEFAULT_DEVICE):
    #tokenizer = BartTokenizer.from_pretrained("facebook/bart-base") 
    model = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device)
    #device = torch.device('cuda:0')
    tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
    
    article_batches = list(chunks(lns['source'], batch_size))
    target_batches = list(chunks(lns['target'], batch_size))
    ls_prediction = []
    ls_groundtruth = []

    dec_batches_untokenized = []
    target_batches_untokenized = []

    for article_batch, target_batch in tqdm(zip(article_batches, target_batches)
    , total=len(article_batches)):
        dct = tokenizer.batch_encode_plus(article_batch,
                                          max_length=1024,
                                          truncation=True,
                                          padding='max_length',
                                          return_tensors="pt")
        summaries = model.generate(
            input_ids=dct["input_ids"].to(device),
            attention_mask=dct["attention_mask"].to(device),
            num_beams=10,
            length_penalty=2.0,
            max_length=512,
            min_length=120,
            no_repeat_ngram_size=3,
            early_stopping=True,
            decoder_start_token_id=tokenizer.eos_token_id,
        )
        #summaries = model.generate_text(dct,3)
        dec = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries]  
        dec = [d.replace('. ', '.\n') for d in dec]

        dec_batches_untokenized.append(dec)
        target_batches_untokenized.append(target_batch)
        

        ls_prediction.extend(dec)
        ls_groundtruth.extend(target_batch)

    
    
    ls_prediction_tokenized = coreNLP_tokenizer(ls_prediction)
    target_batch_tokenized = coreNLP_tokenizer(ls_groundtruth)

    #for i in range(len(ls_prediction_tokenized)):
    # print(ls_prediction_tokenized[i])
    # print (target_batch_tokenized[i])
    # print('==============================')
    #print (ls_prediction_tokenized)
    #print(target_batch_tokenized)
    #dec_batches = list(chunks(ls_prediction_tokenized, batch_size))
    #target_batches = list(chunks(target_batch_tokenized, batch_size))


    #for dec_batch, target_batch in tqdm(zip(dec_batches, target_batches), total=len(dec_batches)):
    metric.add_batch(predictions=ls_prediction_tokenized, references=target_batch_tokenized)

    score = metric.compute()
    str_now = str(datetime.now())
    result_file_name = root_dir + "bart_pretrained_setup6_testing_model_generated_summaries_april_22_beam_10.csv"
    create_csv(ls_groundtruth, ls_prediction, result_file_name)
    #with open('/content/gdrive/Shareddrives/Informed Consent/202012_summarization_results/{0}_predictions.pkl'.format(str_now), 'wb') as fid:
    #    pickle.dump(ls_prediction, fid)
    #with open('/content/gdrive/Shareddrives/Informed Consent/202012_summarization_results/{0}_groundtruth.pkl'.format(str_now), 'wb') as fid:
    #    pickle.dump(ls_groundtruth, fid)
    #print("ls prediction: ")
    #print(ls_prediction)
    #print("ls groundtruth: ")
    #print(ls_groundtruth)
    return score

In [9]:
# Create a dataloading module as per the PyTorch Lightning Docs
class SummaryDataModule(pl.LightningDataModule):
  def __init__(self, tokenizer, data_file, batch_size, num_examples = 7000):
    super().__init__()
    self.tokenizer = tokenizer
    self.data_file = data_file
    self.batch_size = batch_size
    self.num_examples = num_examples
  
  # Loads and splits the data into training, validation and test sets with a 60/20/20 split
  def prepare_data(self):
    self.data = pd.read_csv(self.data_file).dropna()[:self.num_examples]

    self.data['source'].astype(str)
    self.data['target'].astype(str)
    self.train, self.validate, self.test = np.split(self.data.sample(frac=1), [int(.6*len(self.data)), int(.8*len(self.data))])

  # encode the sentences using the tokenizer  
  def setup(self, stage):
    self.train = encode_sentences(self.tokenizer, self.train['source'], self.train['target'])
    self.validate = encode_sentences(self.tokenizer, self.validate['source'], self.validate['target'])
    self.test = encode_sentences(self.tokenizer, self.test['source'], self.test['target'])

  # Load the training, validation and test sets in Pytorch Dataset objects
  def train_dataloader(self):
    dataset = TensorDataset(self.train['input_ids'], self.train['attention_mask'], self.train['labels'])                          
    train_data = DataLoader(dataset, sampler = RandomSampler(dataset), batch_size = self.batch_size)
    return train_data

  def val_dataloader(self):
    dataset = TensorDataset(self.validate['input_ids'], self.validate['attention_mask'], self.validate['labels']) 
    val_data = DataLoader(dataset, batch_size = self.batch_size)                       
    return val_data

  def test_dataloader(self):
    dataset = TensorDataset(self.test['input_ids'], self.test['attention_mask'], self.test['labels']) 
    test_data = DataLoader(dataset, batch_size = self.batch_size)                   
    return test_data


In [10]:
!pip install rouge_score
from datasets import list_metrics
metrics_list = list_metrics()
len(metrics_list)
print (metrics_list)
rouge_metric = load_metric('rouge')

['accuracy', 'bertscore', 'bleu', 'bleurt', 'cer', 'chrf', 'code_eval', 'comet', 'competition_math', 'coval', 'cuad', 'exact_match', 'f1', 'frugalscore', 'glue', 'google_bleu', 'indic_glue', 'mae', 'mahalanobis', 'matthews_correlation', 'mauve', 'mean_iou', 'meteor', 'mse', 'pearsonr', 'perplexity', 'precision', 'recall', 'rouge', 'sacrebleu', 'sari', 'seqeval', 'spearmanr', 'squad', 'squad_v2', 'super_glue', 'ter', 'wer', 'wiki_split', 'xnli', 'xtreme_s']


In [11]:
# Install stanza; note that the prefix "!" is not needed if you are running in a terminal
!pip install stanza

# Import stanza
import stanza



In [12]:
# Download the Stanford CoreNLP package with Stanza's installation command
# This'll take several minutes, depending on the network speed
corenlp_dir = './corenlp'
stanza.install_corenlp(dir=corenlp_dir)

# Set the CORENLP_HOME environment variable to point to the installation location
import os
os.environ["CORENLP_HOME"] = corenlp_dir



In [13]:
# Examine the CoreNLP installation folder to make sure the installation is successful
!export CORENLP_HOME='./corenlp'
!ls $CORENLP_HOME

build.xml				  jollyday.jar
corenlp.sh				  LIBRARY-LICENSES
CoreNLP-to-HTML.xsl			  LICENSE.txt
ejml-core-0.39.jar			  Makefile
ejml-core-0.39-sources.jar		  patterns
ejml-ddense-0.39.jar			  pom-java-11.xml
ejml-ddense-0.39-sources.jar		  pom-java-17.xml
ejml-simple-0.39.jar			  pom.xml
ejml-simple-0.39-sources.jar		  protobuf-java-3.19.2.jar
input.txt				  README.txt
input.txt.out				  RESOURCE-LICENSES
input.txt.xml				  SemgrexDemo.java
istack-commons-runtime-3.0.7.jar	  ShiftReduceDemo.java
istack-commons-runtime-3.0.7-sources.jar  slf4j-api.jar
javax.activation-api-1.2.0.jar		  slf4j-simple.jar
javax.activation-api-1.2.0-sources.jar	  stanford-corenlp-4.4.0.jar
javax.json-api-1.0-sources.jar		  stanford-corenlp-4.4.0-javadoc.jar
javax.json.jar				  stanford-corenlp-4.4.0-models.jar
jaxb-api-2.4.0-b180830.0359.jar		  stanford-corenlp-4.4.0-sources.jar
jaxb-api-2.4.0-b180830.0359-sources.jar   StanfordCoreNlpDemo.java
jaxb-impl-2.4.0-b180830.0438.jar	  StanfordDependenciesManual.p

In [14]:
# Import client module
from stanza.server import CoreNLPClient

In [15]:
texts = ["Albert Einstein was a German-born theoretical physicist.", "He was going to the school!"]
def coreNLP_tokenizer(inputDocsList):
  tokenizedDocsList = []
  with CoreNLPClient(annotators="tokenize ssplit pos lemma ner depparse".split(), memory='4G', endpoint='http://localhost:9001', be_quiet=True) as client:
    for d in inputDocsList:
      ann = client.annotate(d)

      # You can access annotations using ann.
      sentence = ann.sentence[0]

      # You can access any property within a sentence.
      #print(sentence.text)

      # Likewise for tokens
      #token = sentence.token[0]
      #print (token)
      tokenizedDocsList.append(' '.join([token.word.lower() for token in sentence.token]))
  return tokenizedDocsList

print(coreNLP_tokenizer(texts))



2022-04-22 12:40:21 INFO: Writing properties to tmp file: corenlp_server-d4f3c9c6edb24c19.props
2022-04-22 12:40:21 INFO: Starting server with command: java -Xmx4G -cp ./corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-d4f3c9c6edb24c19.props -annotators tokenize,ssplit,pos,lemma,ner,depparse -preload -outputFormat serialized


['albert einstein was a german - born theoretical physicist .', 'he was going to the school !']


In [16]:
import pandas as pd

df = pd.read_csv(root_dir + "setup6_testing.csv")
df.dropna()
df['source'] = df['source'].astype(str)
df['target'] = df['target'].astype(str)
df = df.iloc[:1000,:]
score = generate_summaries(df, rouge_metric) #tokenizer, bart_model,

100%|██████████| 1000/1000 [2:01:55<00:00,  7.32s/it]
2022-04-22 14:42:47 INFO: Writing properties to tmp file: corenlp_server-b86381656e494f57.props
2022-04-22 14:42:47 INFO: Starting server with command: java -Xmx4G -cp ./corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-b86381656e494f57.props -annotators tokenize,ssplit,pos,lemma,ner,depparse -preload -outputFormat serialized
2022-04-22 14:58:24 INFO: Writing properties to tmp file: corenlp_server-8989d3b16c5b45b3.props
2022-04-22 14:58:24 INFO: Starting server with command: java -Xmx4G -cp ./corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-8989d3b16c5b45b3.props -annotators tokenize,ssplit,pos,lemma,ner,depparse -preload -outputFormat serialized


In [17]:
print(score)
#setup 1
#new setup 2: setup 7
#new setup 3: setup 8
#setup 4
#setup 6
# .....////////

{'rouge1': AggregateScore(low=Score(precision=0.29498286984234423, recall=0.24594680137346567, fmeasure=0.25055893997028816), mid=Score(precision=0.31275186851648584, recall=0.26293685443577314, fmeasure=0.2660769212618689), high=Score(precision=0.331955666345758, recall=0.28088618377676733, fmeasure=0.28405881575102215)), 'rouge2': AggregateScore(low=Score(precision=0.1723794698088145, recall=0.14832332851268157, fmeasure=0.15080917295286478), mid=Score(precision=0.1895064947123098, recall=0.16458308703944718, fmeasure=0.1672126405104167), high=Score(precision=0.2075429973150358, recall=0.18194609155544475, fmeasure=0.18446283528381138)), 'rougeL': AggregateScore(low=Score(precision=0.27436873533189365, recall=0.2291754916423595, fmeasure=0.23335088861121708), mid=Score(precision=0.2938021546253572, recall=0.24717443540150744, fmeasure=0.25070846194943736), high=Score(precision=0.31337804970998806, recall=0.26489161173100106, fmeasure=0.26825894205390854)), 'rougeLsum': AggregateScore

Setup 1 (239): {'rouge1': AggregateScore(low=Score(precision=0.28722830566498725, recall=0.08814194717994406, fmeasure=0.12837556161709315), mid=Score(precision=0.3133012947245244, recall=0.0975060863495234, fmeasure=0.14160916564686518), high=Score(precision=0.33987915536218366, recall=0.10763156473676706, fmeasure=0.1553124167614574)), 'rouge2': AggregateScore(low=Score(precision=0.143355986791402, recall=0.04395986131007037, fmeasure=0.06480345762513029), mid=Score(precision=0.16552784131486176, recall=0.05232851850861063, fmeasure=0.07641645049074844), high=Score(precision=0.19085530292204653, recall=0.06174358453468614, fmeasure=0.0896206101419907)), 'rougeL': AggregateScore(low=Score(precision=0.24326704165619062, recall=0.07313072536293883, fmeasure=0.10697455601530169), mid=Score(precision=0.26750353265687643, recall=0.08238807154752045, fmeasure=0.11999374701287896), high=Score(precision=0.29292892698596174, recall=0.09194877979152317, fmeasure=0.13326891629012305)), 'rougeLsum': AggregateScore(low=Score(precision=0.24710850110828064, recall=0.07418934963626499, fmeasure=0.1084206988261524), mid=Score(precision=0.26937864933270816, recall=0.08264275232714688, fmeasure=0.1204436432139826), high=Score(precision=0.29501287688343175, recall=0.09252854755347768, fmeasure=0.1339309343608814))}

Setup 2 Batch 1 of 500: 
{'rouge1': AggregateScore(low=Score(precision=0.5891978946719967, recall=0.3354197913016133, fmeasure=0.40090480119089367), mid=Score(precision=0.6009934365905751, recall=0.35415364776384584, fmeasure=0.41630349093185104), high=Score(precision=0.6124562544616946, recall=0.37300018903859716, fmeasure=0.4308463648380982)), 'rouge2': AggregateScore(low=Score(precision=0.41689975171643484, recall=0.23939178988282717, fmeasure=0.283373061002194), mid=Score(precision=0.42960126881939165, recall=0.2563911139259175, fmeasure=0.29764958159978233), high=Score(precision=0.4420258779439407, recall=0.2737717262031954, fmeasure=0.3119211379839062)), 'rougeL': AggregateScore(low=Score(precision=0.5453401550714627, recall=0.312319457414838, fmeasure=0.37281241902693896), mid=Score(precision=0.5573141658043004, recall=0.33228562536385753, fmeasure=0.38796135736724846), high=Score(precision=0.569727480817454, recall=0.35151039819778523, fmeasure=0.40368411129545795)), 'rougeLsum': AggregateScore(low=Score(precision=0.545275292347259, recall=0.3135170673731939, fmeasure=0.37346559759172027), mid=Score(precision=0.5577365855303434, recall=0.33276155850851064, fmeasure=0.38858226033362336), high=Score(precision=0.5718387357865006, recall=0.35170071744416936, fmeasure=0.40460825230434805))}

changed to min length 120, max length 512

Setup 7 Batch 1 of 1000: {'rouge1': AggregateScore(low=Score(precision=0.2892991096171933, recall=0.08644604216094207, fmeasure=0.1252305288489533), mid=Score(precision=0.30716120071122044, recall=0.09294557135400588, fmeasure=0.13430566265393828), high=Score(precision=0.32408386823626883, recall=0.09954806661596668, fmeasure=0.14327408341374592)), 'rouge2': AggregateScore(low=Score(precision=0.1470486921093726, recall=0.04393581321705372, fmeasure=0.06488210159970717), mid=Score(precision=0.16249654561312926, recall=0.04968434151453449, fmeasure=0.07258087817173492), high=Score(precision=0.17764026507382372, recall=0.05550526317191996, fmeasure=0.08083292095933844)), 'rougeL': AggregateScore(low=Score(precision=0.2503010012517298, recall=0.0742641313456086, fmeasure=0.10826602478095192), mid=Score(precision=0.2658479100259661, recall=0.07953573902181765, fmeasure=0.1156947121259424), high=Score(precision=0.2831239808840482, recall=0.08553591516949739, fmeasure=0.12391115648745366)), 'rougeLsum': AggregateScore(low=Score(precision=0.24782089188736053, recall=0.07331775292323313, fmeasure=0.10673090061966682), mid=Score(precision=0.26603300874552877, recall=0.07946388735538865, fmeasure=0.11556123450801215), high=Score(precision=0.28199079682892303, recall=0.08583262109104642, fmeasure=0.12431240248300368))}


Setup 8 Batch size 1000: {'rouge1': AggregateScore(low=Score(precision=0.2910981910689075, recall=0.08680338795937001, fmeasure=0.1259990685817724), mid=Score(precision=0.30740869151357175, recall=0.09321688007167153, fmeasure=0.13444120665750914), high=Score(precision=0.3249541136424948, recall=0.09963038110131235, fmeasure=0.14348762548724142)), 'rouge2': AggregateScore(low=Score(precision=0.14582735312326645, recall=0.043939513849456664, fmeasure=0.06442764112957909), mid=Score(precision=0.1616070136289472, recall=0.049475984658177374, fmeasure=0.07238316397107578), high=Score(precision=0.177826049170152, recall=0.05482994111477593, fmeasure=0.07996659885116728)), 'rougeL': AggregateScore(low=Score(precision=0.24880728828841736, recall=0.07355719422364107, fmeasure=0.10745092724493206), mid=Score(precision=0.26584459824779727, recall=0.07927852674268741, fmeasure=0.11535016444022876), high=Score(precision=0.2827610907523112, recall=0.08607669993365831, fmeasure=0.12471615590940531)), 'rougeLsum': AggregateScore(low=Score(precision=0.25046592496099157, recall=0.07384434991455695, fmeasure=0.10763773352882947), mid=Score(precision=0.26622323884756915, recall=0.07954335359878612, fmeasure=0.11569212406585916), high=Score(precision=0.28284612589620134, recall=0.08600835525000182, fmeasure=0.12462960356041794))}


changed number of beams to 10

setup 1 (First 1000): {'rouge1': AggregateScore(low=Score(precision=0.31164297694557974, recall=0.09267538027213244, fmeasure=0.1358250597858995), mid=Score(precision=0.3285656753521243, recall=0.09896565432724438, fmeasure=0.14430652583367076), high=Score(precision=0.34518954811104396, recall=0.10510754755913575, fmeasure=0.15299793636989087)), 'rouge2': AggregateScore(low=Score(precision=0.1556052723944178, recall=0.045996004222377124, fmeasure=0.06777973507447012), mid=Score(precision=0.17223082665912492, recall=0.05163629018574175, fmeasure=0.07606841988502183), high=Score(precision=0.18936254457797896, recall=0.05757022793910876, fmeasure=0.08453476398343505)), 'rougeL': AggregateScore(low=Score(precision=0.26800078394024524, recall=0.07904220235307359, fmeasure=0.11617811671769997), mid=Score(precision=0.2845398434600665, recall=0.08422509266691164, fmeasure=0.12351705345490913), high=Score(precision=0.30217227494693044, recall=0.09097734445247439, fmeasure=0.13345600758138618)), 'rougeLsum': AggregateScore(low=Score(precision=0.26647405264140067, recall=0.07851870818696627, fmeasure=0.1154867384227982), mid=Score(precision=0.2838739943255195, recall=0.0840417148268883, fmeasure=0.12332842910930836), high=Score(precision=0.30101157420664576, recall=0.09020289201900096, fmeasure=0.1319553865826242))}


setup 7 (first 1000): {'rouge1': AggregateScore(low=Score(precision=0.2978103305268655, recall=0.0930477353053869, fmeasure=0.132743554700207), mid=Score(precision=0.31493752336155656, recall=0.09990716010044705, fmeasure=0.14189823315990205), high=Score(precision=0.33286848594835416, recall=0.10643613512087954, fmeasure=0.1512808865749243)), 'rouge2': AggregateScore(low=Score(precision=0.15093620219901374, recall=0.04701617624701626, fmeasure=0.06823585128352652), mid=Score(precision=0.16787937130734046, recall=0.05279357734038581, fmeasure=0.07669479390399353), high=Score(precision=0.18435063270349938, recall=0.058644718968500854, fmeasure=0.08483092075137226)), 'rougeL': AggregateScore(low=Score(precision=0.2546506667861504, recall=0.0776538201427948, fmeasure=0.11226530607243429), mid=Score(precision=0.2717565849433424, recall=0.08459088530026385, fmeasure=0.1215639172336983), high=Score(precision=0.28935171791638103, recall=0.09073514864755002, fmeasure=0.13005726755078698)), 'rougeLsum': AggregateScore(low=Score(precision=0.2542836471753781, recall=0.07855523031957679, fmeasure=0.11312899758137712), mid=Score(precision=0.2714374818531296, recall=0.08464642570994346, fmeasure=0.12159944377438751), high=Score(precision=0.28918194498105687, recall=0.09076174100045295, fmeasure=0.13046431312938236))}

setup 8 (first 1000): {'rouge1': AggregateScore(low=Score(precision=0.2992667492719756, recall=0.09311588645371638, fmeasure=0.1329644061921033), mid=Score(precision=0.3146944646613265, recall=0.09970086150006957, fmeasure=0.14191563154391446), high=Score(precision=0.3329443623099787, recall=0.10606509811033547, fmeasure=0.1510939575888892)), 'rouge2': AggregateScore(low=Score(precision=0.151421784295168, recall=0.04669052375650681, fmeasure=0.06778406666942428), mid=Score(precision=0.16772081033146818, recall=0.052660887456085825, fmeasure=0.07630963908322153), high=Score(precision=0.18402846913576923, recall=0.05886047350505215, fmeasure=0.08507138130366308)), 'rougeL': AggregateScore(low=Score(precision=0.25462634627559616, recall=0.07849551700554099, fmeasure=0.11302570050337428), mid=Score(precision=0.27185012410200327, recall=0.08451423144844576, fmeasure=0.12152760647283341), high=Score(precision=0.28908383445337815, recall=0.09107719686114983, fmeasure=0.1306932774077536)), 'rougeLsum': AggregateScore(low=Score(precision=0.25352502545050204, recall=0.07823436839953732, fmeasure=0.1123046882780546), mid=Score(precision=0.27102125826535417, recall=0.08448553585188429, fmeasure=0.12141955530771864), high=Score(precision=0.287365128432533, recall=0.09118622044320206, fmeasure=0.13082454272699193))}


setup 4 (first 1000): {'rouge1': AggregateScore(low=Score(precision=0.28705051019822797, recall=0.24438735163375963, fmeasure=0.24625556567426168), mid=Score(precision=0.3057638799595561, recall=0.2623645391515626, fmeasure=0.26375303798649996), high=Score(precision=0.3240581152711498, recall=0.2791666700194609, fmeasure=0.2797086097527861)), 'rouge2': AggregateScore(low=Score(precision=0.16958222533107736, recall=0.1473874373206103, fmeasure=0.14940177094506957), mid=Score(precision=0.1869026458073797, recall=0.16394394152662262, fmeasure=0.16594013429457155), high=Score(precision=0.20500801521755857, recall=0.1816551210072497, fmeasure=0.18337668940527677)), 'rougeL': AggregateScore(low=Score(precision=0.26704264328613553, recall=0.22823788091421868, fmeasure=0.23069417242224388), mid=Score(precision=0.28579853257741433, recall=0.24472274803568855, fmeasure=0.24637442505285376), high=Score(precision=0.3044303474827074, recall=0.26195316271558927, fmeasure=0.26370985815231573)), 'rougeLsum': AggregateScore(low=Score(precision=0.266687959222359, recall=0.22623908194586356, fmeasure=0.2288952677739471), mid=Score(precision=0.28568820016293106, recall=0.24440082859198842, fmeasure=0.24615160921950413), high=Score(precision=0.30606144044283057, recall=0.26366958601427914, fmeasure=0.2650950831842788))}


setup 6 (first 1000): {'rouge1': AggregateScore(low=Score(precision=0.29498286984234423, recall=0.24594680137346567, fmeasure=0.25055893997028816), mid=Score(precision=0.31275186851648584, recall=0.26293685443577314, fmeasure=0.2660769212618689), high=Score(precision=0.331955666345758, recall=0.28088618377676733, fmeasure=0.28405881575102215)), 'rouge2': AggregateScore(low=Score(precision=0.1723794698088145, recall=0.14832332851268157, fmeasure=0.15080917295286478), mid=Score(precision=0.1895064947123098, recall=0.16458308703944718, fmeasure=0.1672126405104167), high=Score(precision=0.2075429973150358, recall=0.18194609155544475, fmeasure=0.18446283528381138)), 'rougeL': AggregateScore(low=Score(precision=0.27436873533189365, recall=0.2291754916423595, fmeasure=0.23335088861121708), mid=Score(precision=0.2938021546253572, recall=0.24717443540150744, fmeasure=0.25070846194943736), high=Score(precision=0.31337804970998806, recall=0.26489161173100106, fmeasure=0.26825894205390854)), 'rougeLsum': AggregateScore(low=Score(precision=0.27455820884597965, recall=0.22950027823679758, fmeasure=0.2332867937631098), mid=Score(precision=0.29307566914843597, recall=0.24598087359081922, fmeasure=0.25008556173655594), high=Score(precision=0.311741159218824, recall=0.2639089855760392, fmeasure=0.2675638614294953))}
