In [1]:
#Create drive folder
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# Get and process the dataset QMSum
Code from : https://github.com/Yale-LILY/QMSum/blob/main/data_process.ipynb

In [None]:
import json

# read the dataset
# please enter the path of your data
split = 'val'
data_path = 'drive/My Drive/Colab Notebooks/finalproject/' + split + '.jsonl'
data = []
with open(data_path) as f:
    for line in f:
        data.append(json.loads(line))
n_meetings = len(data)
print('Total {} meetings in the {} set.'.format(n_meetings, split))

Total 35 meetings in the val set.


In [None]:
data[0]

In [None]:
from nltk import word_tokenize
# tokneize a sent
def tokenize(sent):
    tokens = ' '.join(word_tokenize(sent.lower()))
    return tokens

In [None]:
# filter some noises caused by speech recognition
def clean_data(text):
    text = text.replace('{ vocalsound } ', '')
    text = text.replace('{ disfmarker } ', '')
    text = text.replace('a_m_i_', 'ami')
    text = text.replace('l_c_d_', 'lcd')
    text = text.replace('p_m_s', 'pms')
    text = text.replace('t_v_', 'tv')
    text = text.replace('{ pause } ', '')
    text = text.replace('{ nonvocalsound } ', '')
    text = text.replace('{ gap } ', '')
    return text

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# process data for BART
# the input of the model here is the gold span corresponding to each query
bart_data_gold = []
for i in range(len(data)):
    # get meeting content
    entire_src = []
    for k in range(len(data[i]['meeting_transcripts'])):
      cur_turn = data[i]['meeting_transcripts'][k]['speaker'].lower() + ': '
      cur_turn_content = tokenize(data[i]['meeting_transcripts'][k]['content'])
      if len(cur_turn_content.split()) > 5:
          cur_turn += cur_turn_content
          entire_src.append(cur_turn)
    entire_src = ' '.join(entire_src)
    for j in range(len(data[i]['general_query_list'])):
        cur = {}
        query = tokenize(data[i]['general_query_list'][j]['query'])
        cur['src'] = clean_data('<s> ' + query + ' </s> ' + entire_src + ' </s>')
        target = tokenize(data[i]['general_query_list'][j]['answer'])
        cur['tgt'] = target
        bart_data_gold.append(cur)
    for j in range(len(data[i]['specific_query_list'])):
        cur = {}
        query = tokenize(data[i]['specific_query_list'][j]['query'])
        src = []
        # get the content in the gold span for each query
        for span in data[i]['specific_query_list'][j]['relevant_text_span']:
          assert len(span) == 2
          st, ed = int(span[0]), int(span[1])
          span_content = []
          for k in range(st, ed + 1):
              cur_turn = data[i]['meeting_transcripts'][k]['speaker'].lower() + ': '
              cur_turn_content = tokenize(data[i]['meeting_transcripts'][k]['content'])
              if len(cur_turn_content.split()) > 5:
                  cur_turn += cur_turn_content
                  span_content.append(cur_turn)
          if span_content:
              src.append(' '.join(span_content))
        src = ' '.join(src)
        cur['src'] = clean_data('<s> ' + query + ' </s> ' + src + ' </s>')
        target = tokenize(data[i]['specific_query_list'][j]['answer'])
        cur['tgt'] = target
        bart_data_gold.append(cur)
        
print('Total {} query-summary pairs in the {} set'.format(len(bart_data_gold), split))
print(bart_data_gold[2])
with open('drive/My Drive/Colab Notebooks/finalproject/processed_span_' + split + '._gold.jsonl', 'w') as f:
    for i in range(len(bart_data_gold)):
        print(json.dumps(bart_data_gold[i]), file=f)

In [None]:
bart_data_gold[340]

In [18]:
#process the data from our locator
import json

# Path to the JSON file
file_path = 'drive/My Drive/Colab Notebooks/finalproject/final_data_ELECTRA.json'

# Load the JSON file
with open(file_path, "r") as f:
    data = json.load(f)

# Convert to a list of dictionaries
processed_data = []
for meeting, content in data.items():
    for key, value in content.items():
        if key.startswith("span"):
            index = key.replace("span", "")
            answer_key = "answer" + index
            item = {"src": value, "tgt": content.get(answer_key)}
            processed_data.append(item)

In [None]:
data['meeting: IS1003d.json']['span1']

In [None]:
data['meeting: IS1003d.json']['answer1']

In [19]:
len(processed_data)

244

# Get datasets from Drive

In [None]:
# function to get the preprocessed jsonl files
import json

def read_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

In [None]:
#get preprocessed datasets
bart_data_gold_train = read_jsonl('drive/My Drive/Colab Notebooks/finalproject/processed_span_train._gold.jsonl')
bart_data_gold_val = read_jsonl('drive/My Drive/Colab Notebooks/finalproject/processed_span_val._gold.jsonl')

print("Train set len : ", len(bart_data_gold_train))
print("Val set len : ", len(bart_data_gold_val))


Train set len :  1257
Val set len :  272


# Fine-tuning

In [None]:
pip install transformers

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration, AdamW
import torch

# Load the model and tokenizer
model_name = "knkarthick/MEETING_SUMMARY"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Prepare the training data (example)
transcriptions = [d["src"] for d in bart_data_gold_train]  # List of meeting transcriptions
summaries = [d["tgt"] for d in bart_data_gold_train]  # List of meeting summaries

# Encode the data
inputs = tokenizer.batch_encode_plus(
    transcriptions,
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=1024
)

labels = tokenizer.batch_encode_plus(
    summaries,
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=256
)

# Extract input and output tensors
# input_ids = inputs["input_ids"]
# attention_mask = inputs["attention_mask"]
# labels = labels["input_ids"]
input_ids = inputs.input_ids
attention_mask = inputs.attention_mask
labels = labels.input_ids

# Configure training
optimizer = AdamW(model.parameters(), lr=1e-5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
input_ids = input_ids.to(device)
attention_mask = attention_mask.to(device)
labels = labels.to(device)

# Training
model.train()
for epoch in range(3):  # Number of epochs
    total_loss = 0
    for i in range(len(input_ids)):
        optimizer.zero_grad()
        outputs = model(input_ids[i].unsqueeze(0), attention_mask=attention_mask[i].unsqueeze(0), labels=labels[i].unsqueeze(0))
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if (i + 1) % 10 == 0:
            print(f"Iteration {i+1}/{len(input_ids)} - Loss: {loss.item()}")
    
    avg_loss = total_loss / len(input_ids)
    print(f"Epoch {epoch+1}/{5} - Average Loss: {avg_loss}")
    


In [None]:
# Save the fine-tuned model
model.save_pretrained("drive/My Drive/Colab Notebooks/finalproject/trained_model/fine_tuned_meeting_bart")
tokenizer.save_pretrained("drive/My Drive/Colab Notebooks/finalproject/trained_model/fine_tuned_meeting_bart")

# Evaluation
In this part, we evaluate the validation set with different models

In [4]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
pip install rouge

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
from transformers import BartTokenizer

# Function to truncate the text to a length  max 1024 tokens
def truncate_text(text, tokenizer, max_length=1024):
    if len(tokenizer.tokenize(text)) < (max_length-3):
        #trace
        print("no truncate : " + str(len(tokenizer.tokenize(text))))

        return text
    else:
        #trace
        before_after = " " + str(len(tokenizer.tokenize(text)))

        #get the query before encoding
        # Find the index of the first and last special tokens
        start_token_idx = text.find("<s>")
        end_token_idx = text.find("</s>")

        # Extract the query between the tags
        query = text[start_token_idx+len("<s>"):end_token_idx].strip()

        
        # Remove the query part and the tags from the text
        truncated_text = text[end_token_idx+len("</s>"):].strip()

        #encode decode
        encoded = tokenizer.encode(truncated_text, truncation=True, max_length=max_length)
        truncated_text = tokenizer.decode(encoded, skip_special_tokens=True)

        #be sure the length won't be over max_length
        additional_length = len(tokenizer.tokenize("<s> " + query + " </s> " + " </s>"))
        
        if len(tokenizer.tokenize(truncated_text)) > (max_length - additional_length-3):
          truncated_tokens = tokenizer.tokenize(truncated_text)[:(max_length - additional_length-5)]
          truncated_text = tokenizer.convert_tokens_to_string(truncated_tokens)
          
        # Concatenate the query with the truncated text and add the </s> tag at the end
        truncated_text = "<s> " + query + " </s> " + truncated_text + " </s>"

        #trace
        before_after = before_after + " -> " + str(len(tokenizer.tokenize(truncated_text)))
        print(before_after)
        
        return truncated_text

In [7]:
from transformers import pipeline
from transformers import BartForConditionalGeneration, BartTokenizer
from rouge import Rouge


#function to create the model
def evaluate(MODEL_NAME, val_set):

  #get model
  model = BartForConditionalGeneration.from_pretrained(MODEL_NAME)
  tokenizer = BartTokenizer.from_pretrained(MODEL_NAME)

  #param model
  params = {
    "model": model,
    "tokenizer": tokenizer,
    "model_name_or_path": None,
    "task": "summarization",
    "framework": "pt",
    "device": 0,
    "max_length": 350,
    "min_length": 30,
    "length_penalty": 2.0,
    "num_beams": 4,
    "repetition_penalty": 1.0,
    "no_repeat_ngram_size": 3,
    "early_stopping": True,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "temperature": 1.0,
  }

  #pipeline
  custom_pipeline = pipeline(**params)

  #get only relevant span for predicting
  inputs = [d["src"] for d in val_set]  # List of meeting transcriptions
  ref = [d["tgt"] for d in val_set] #list of reference summarizes

  #summarizes
  sums = []

  #get every sum
  for i, text in enumerate(inputs):

    #progression
    print("[---- "+str(i)+"----]")
    truncated_text = truncate_text(text,tokenizer, max_length=1024)
    print("len tokens : " + str(len(tokenizer.tokenize(truncated_text))) + " ("+str(len(truncated_text.split()))+")")

    summary = custom_pipeline(truncated_text)
    sums.append(summary[0]['summary_text'])


  return sums,ref

In [8]:
from statistics import mean
def get_rouge_l(sums, refs):

  #Get ROUGE Scores
  rouge_l_scores = []
  rouge_1_scores = []
  rouge_2_scores = []

  for i in range(len(sums)):
    hypothesis = sums[i]
    reference = refs[i]
    rouge = Rouge()
    score = rouge.get_scores([hypothesis], [reference])

    #rouge 1
    rouge_1_score = score[0]["rouge-1"]["f"]
    rouge_1_scores.append(rouge_1_score)
    #rouge 2
    rouge_2_score = score[0]["rouge-2"]["f"]
    rouge_2_scores.append(rouge_2_score)
    #rouge l
    rouge_l_score = score[0]["rouge-l"]["f"]
    rouge_l_scores.append(rouge_l_score)
  
  return mean(rouge_1_scores), mean(rouge_2_scores), mean(rouge_l_scores)

In [27]:
#Models
BART = "facebook/bart-large-cnn"
MEETING_SUM = "knkarthick/MEETING_SUMMARY"

MY_BART_CNN = "drive/My Drive/Colab Notebooks/finalproject/trained_model/fine_tuned_bart_cnn"
MY_MEETING_SUM = "drive/My Drive/Colab Notebooks/finalproject/trained_model/fine_tuned_meeting_bart"

MODEL = MY_MEETING_SUM

#get rouge-l score
sums, refs = evaluate(MODEL,processed_data)

Your max_length is set to 350, but your input_length is only 196. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=98)


[---- 0----]
no truncate : 194
len tokens : 194 (134)


Your max_length is set to 350, but your input_length is only 115. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)


[---- 1----]
no truncate : 113
len tokens : 113 (76)


Your max_length is set to 350, but your input_length is only 153. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=76)


[---- 2----]
no truncate : 151
len tokens : 151 (104)


Your max_length is set to 350, but your input_length is only 212. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=106)


[---- 3----]
no truncate : 210
len tokens : 210 (144)


Your max_length is set to 350, but your input_length is only 193. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=96)


[---- 4----]
no truncate : 191
len tokens : 191 (135)


Your max_length is set to 350, but your input_length is only 161. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=80)


[---- 5----]
no truncate : 159
len tokens : 159 (110)


Your max_length is set to 350, but your input_length is only 193. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=96)


[---- 6----]
no truncate : 191
len tokens : 191 (133)


Your max_length is set to 350, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)


[---- 7----]
no truncate : 125
len tokens : 125 (85)


Your max_length is set to 350, but your input_length is only 159. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=79)


[---- 8----]
no truncate : 157
len tokens : 157 (106)
[---- 9----]
no truncate : 724
len tokens : 724 (501)
[---- 10----]
no truncate : 446
len tokens : 446 (305)
[---- 11----]
no truncate : 722
len tokens : 722 (502)
[---- 12----]
no truncate : 446
len tokens : 446 (302)
[---- 13----]
no truncate : 437
len tokens : 437 (300)
[---- 14----]
no truncate : 354
len tokens : 354 (237)


Your max_length is set to 350, but your input_length is only 146. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=73)


[---- 15----]
no truncate : 144
len tokens : 144 (108)


Your max_length is set to 350, but your input_length is only 136. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)


[---- 16----]
no truncate : 134
len tokens : 134 (104)


Your max_length is set to 350, but your input_length is only 154. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=77)


[---- 17----]
no truncate : 152
len tokens : 152 (118)


Your max_length is set to 350, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)


[---- 18----]
no truncate : 111
len tokens : 111 (90)


Your max_length is set to 350, but your input_length is only 128. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)


[---- 19----]
no truncate : 126
len tokens : 126 (96)


Your max_length is set to 350, but your input_length is only 123. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)


[---- 20----]
no truncate : 121
len tokens : 121 (93)
[---- 21----]
no truncate : 440
len tokens : 440 (359)


Your max_length is set to 350, but your input_length is only 247. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=123)


[---- 22----]
no truncate : 245
len tokens : 245 (206)
[---- 23----]
no truncate : 399
len tokens : 399 (331)
[---- 24----]
no truncate : 413
len tokens : 413 (336)
[---- 25----]
no truncate : 365
len tokens : 365 (300)


Your max_length is set to 350, but your input_length is only 325. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=162)


[---- 26----]
no truncate : 323
len tokens : 323 (266)


Your max_length is set to 350, but your input_length is only 326. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=163)


[---- 27----]
no truncate : 324
len tokens : 324 (267)
[---- 28----]
no truncate : 489
len tokens : 489 (394)


Your max_length is set to 350, but your input_length is only 292. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=146)


[---- 29----]
no truncate : 290
len tokens : 290 (236)


Your max_length is set to 350, but your input_length is only 319. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=159)


[---- 30----]
no truncate : 317
len tokens : 317 (264)


Your max_length is set to 350, but your input_length is only 170. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=85)


[---- 31----]
no truncate : 168
len tokens : 168 (129)


Your max_length is set to 350, but your input_length is only 185. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=92)


[---- 32----]
no truncate : 183
len tokens : 183 (138)


Your max_length is set to 350, but your input_length is only 186. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=93)


[---- 33----]
no truncate : 184
len tokens : 184 (140)


Your max_length is set to 350, but your input_length is only 96. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)


[---- 34----]
no truncate : 94
len tokens : 94 (69)


Your max_length is set to 350, but your input_length is only 74. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=37)


[---- 35----]
no truncate : 72
len tokens : 72 (55)


Your max_length is set to 350, but your input_length is only 98. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)


[---- 36----]
no truncate : 96
len tokens : 96 (73)


Your max_length is set to 350, but your input_length is only 96. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)


[---- 37----]
no truncate : 94
len tokens : 94 (73)


Your max_length is set to 350, but your input_length is only 94. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=47)


[---- 38----]
no truncate : 92
len tokens : 92 (66)


Your max_length is set to 350, but your input_length is only 93. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=46)


[---- 39----]
no truncate : 91
len tokens : 91 (69)


Your max_length is set to 350, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)


[---- 40----]
no truncate : 125
len tokens : 125 (86)


Your max_length is set to 350, but your input_length is only 102. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=51)


[---- 41----]
no truncate : 100
len tokens : 100 (74)


Your max_length is set to 350, but your input_length is only 119. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=59)


[---- 42----]
no truncate : 117
len tokens : 117 (86)


Your max_length is set to 350, but your input_length is only 69. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=34)


[---- 43----]
no truncate : 67
len tokens : 67 (51)


Your max_length is set to 350, but your input_length is only 290. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=145)


[---- 44----]
no truncate : 288
len tokens : 288 (201)
[---- 45----]
no truncate : 354
len tokens : 354 (262)


Your max_length is set to 350, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)


[---- 46----]
no truncate : 125
len tokens : 125 (87)


Your max_length is set to 350, but your input_length is only 247. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=123)


[---- 47----]
no truncate : 245
len tokens : 245 (171)


Your max_length is set to 350, but your input_length is only 213. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=106)


[---- 48----]
no truncate : 211
len tokens : 211 (144)


Your max_length is set to 350, but your input_length is only 212. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=106)


[---- 49----]
no truncate : 210
len tokens : 210 (143)


Your max_length is set to 350, but your input_length is only 65. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=32)


[---- 50----]
no truncate : 63
len tokens : 63 (59)


Your max_length is set to 350, but your input_length is only 14. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)


[---- 51----]
no truncate : 12
len tokens : 12 (12)
[---- 52----]
no truncate : 371
len tokens : 371 (311)


Your max_length is set to 350, but your input_length is only 298. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=149)


[---- 53----]
no truncate : 296
len tokens : 296 (243)


Your max_length is set to 350, but your input_length is only 84. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)


[---- 54----]
no truncate : 82
len tokens : 82 (74)
[---- 55----]
no truncate : 356
len tokens : 356 (300)


Your max_length is set to 350, but your input_length is only 105. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=52)


[---- 56----]
no truncate : 103
len tokens : 103 (70)


Your max_length is set to 350, but your input_length is only 79. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)


[---- 57----]
no truncate : 77
len tokens : 77 (57)


Your max_length is set to 350, but your input_length is only 79. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)


[---- 58----]
no truncate : 77
len tokens : 77 (57)


Your max_length is set to 350, but your input_length is only 104. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=52)


[---- 59----]
no truncate : 102
len tokens : 102 (69)


Your max_length is set to 350, but your input_length is only 97. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)


[---- 60----]
no truncate : 95
len tokens : 95 (68)


Your max_length is set to 350, but your input_length is only 81. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)


[---- 61----]
no truncate : 79
len tokens : 79 (59)


Your max_length is set to 350, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)


[---- 62----]
no truncate : 125
len tokens : 125 (88)


Your max_length is set to 350, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)


[---- 63----]
no truncate : 86
len tokens : 86 (61)


Your max_length is set to 350, but your input_length is only 86. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)


[---- 64----]
no truncate : 84
len tokens : 84 (59)


Your max_length is set to 350, but your input_length is only 197. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=98)


[---- 65----]
no truncate : 195
len tokens : 195 (149)


Your max_length is set to 350, but your input_length is only 213. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=106)


[---- 66----]
no truncate : 211
len tokens : 211 (161)


Your max_length is set to 350, but your input_length is only 215. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=107)


[---- 67----]
no truncate : 213
len tokens : 213 (165)


Your max_length is set to 350, but your input_length is only 255. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=127)


[---- 68----]
no truncate : 253
len tokens : 253 (197)


Your max_length is set to 350, but your input_length is only 209. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=104)


[---- 69----]
no truncate : 207
len tokens : 207 (161)


Your max_length is set to 350, but your input_length is only 211. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=105)


[---- 70----]
no truncate : 209
len tokens : 209 (159)


Your max_length is set to 350, but your input_length is only 217. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=108)


[---- 71----]
no truncate : 215
len tokens : 215 (167)


Your max_length is set to 350, but your input_length is only 128. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)


[---- 72----]
no truncate : 126
len tokens : 126 (94)


Your max_length is set to 350, but your input_length is only 218. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=109)


[---- 73----]
no truncate : 216
len tokens : 216 (164)


Your max_length is set to 350, but your input_length is only 207. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=103)


[---- 74----]
no truncate : 205
len tokens : 205 (158)


Your max_length is set to 350, but your input_length is only 201. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=100)


[---- 75----]
no truncate : 199
len tokens : 199 (153)


Your max_length is set to 350, but your input_length is only 104. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=52)


[---- 76----]
no truncate : 102
len tokens : 102 (73)


Your max_length is set to 350, but your input_length is only 178. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=89)


[---- 77----]
no truncate : 176
len tokens : 176 (124)


Your max_length is set to 350, but your input_length is only 137. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)


[---- 78----]
no truncate : 135
len tokens : 135 (93)


Your max_length is set to 350, but your input_length is only 123. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)


[---- 79----]
no truncate : 121
len tokens : 121 (87)


Your max_length is set to 350, but your input_length is only 49. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=24)


[---- 80----]
no truncate : 47
len tokens : 47 (29)


Your max_length is set to 350, but your input_length is only 97. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)


[---- 81----]
no truncate : 95
len tokens : 95 (66)


Your max_length is set to 350, but your input_length is only 229. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=114)


[---- 82----]
no truncate : 227
len tokens : 227 (200)
[---- 83----]
no truncate : 441
len tokens : 441 (385)
[---- 84----]
no truncate : 434
len tokens : 434 (379)
[---- 85----]
no truncate : 437
len tokens : 437 (380)
[---- 86----]
no truncate : 459
len tokens : 459 (393)
[---- 87----]
no truncate : 490
len tokens : 490 (413)
[---- 88----]
no truncate : 485
len tokens : 485 (413)


Your max_length is set to 350, but your input_length is only 40. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=20)


[---- 89----]
no truncate : 38
len tokens : 38 (29)
[---- 90----]
no truncate : 442
len tokens : 442 (386)
[---- 91----]
no truncate : 529
len tokens : 529 (447)


Your max_length is set to 350, but your input_length is only 63. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=31)


[---- 92----]
no truncate : 61
len tokens : 61 (43)
[---- 93----]
no truncate : 435
len tokens : 435 (344)


Your max_length is set to 350, but your input_length is only 149. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=74)


[---- 94----]
no truncate : 147
len tokens : 147 (125)


Your max_length is set to 350, but your input_length is only 134. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=67)


[---- 95----]
no truncate : 132
len tokens : 132 (115)
[---- 96----]
no truncate : 558
len tokens : 558 (490)


Your max_length is set to 350, but your input_length is only 140. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=70)


[---- 97----]
no truncate : 138
len tokens : 138 (121)


Your max_length is set to 350, but your input_length is only 17. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


[---- 98----]
no truncate : 15
len tokens : 15 (15)


Your max_length is set to 350, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


[---- 99----]
no truncate : 18
len tokens : 18 (18)


Your max_length is set to 350, but your input_length is only 35. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)


[---- 100----]
no truncate : 33
len tokens : 33 (27)
[---- 101----]
no truncate : 502
len tokens : 502 (451)


Your max_length is set to 350, but your input_length is only 316. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=158)


[---- 102----]
no truncate : 314
len tokens : 314 (274)


Your max_length is set to 350, but your input_length is only 247. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=123)


[---- 103----]
no truncate : 245
len tokens : 245 (213)


Your max_length is set to 350, but your input_length is only 243. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=121)


[---- 104----]
no truncate : 241
len tokens : 241 (209)


Your max_length is set to 350, but your input_length is only 320. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=160)


[---- 105----]
no truncate : 318
len tokens : 318 (276)


Your max_length is set to 350, but your input_length is only 337. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=168)


[---- 106----]
no truncate : 335
len tokens : 335 (256)


Your max_length is set to 350, but your input_length is only 327. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=163)


[---- 107----]
no truncate : 325
len tokens : 325 (254)


Your max_length is set to 350, but your input_length is only 345. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=172)


[---- 108----]
no truncate : 343
len tokens : 343 (262)
[---- 109----]
no truncate : 351
len tokens : 351 (266)


Your max_length is set to 350, but your input_length is only 342. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=171)


[---- 110----]
no truncate : 340
len tokens : 340 (261)


Your max_length is set to 350, but your input_length is only 319. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=159)


[---- 111----]
no truncate : 317
len tokens : 317 (245)
[---- 112----]
no truncate : 533
len tokens : 533 (478)


Your max_length is set to 350, but your input_length is only 28. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


[---- 113----]
no truncate : 26
len tokens : 26 (20)
[---- 114----]
no truncate : 495
len tokens : 495 (439)
[---- 115----]
no truncate : 447
len tokens : 447 (401)


Your max_length is set to 350, but your input_length is only 219. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=109)


[---- 116----]
no truncate : 217
len tokens : 217 (199)
[---- 117----]
no truncate : 541
len tokens : 541 (477)
[---- 118----]
no truncate : 545
len tokens : 545 (492)


Your max_length is set to 350, but your input_length is only 257. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=128)


[---- 119----]
no truncate : 255
len tokens : 255 (227)
[---- 120----]
no truncate : 542
len tokens : 542 (478)
[---- 121----]
no truncate : 365
len tokens : 365 (326)


Your max_length is set to 350, but your input_length is only 15. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)


[---- 122----]
no truncate : 13
len tokens : 13 (13)
[---- 123----]
no truncate : 600
len tokens : 600 (536)


Your max_length is set to 350, but your input_length is only 87. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)


[---- 124----]
no truncate : 85
len tokens : 85 (70)


Your max_length is set to 350, but your input_length is only 110. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)


[---- 125----]
no truncate : 108
len tokens : 108 (88)


Your max_length is set to 350, but your input_length is only 115. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)


[---- 126----]
no truncate : 113
len tokens : 113 (90)


Your max_length is set to 350, but your input_length is only 151. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=75)


[---- 127----]
no truncate : 149
len tokens : 149 (113)


Your max_length is set to 350, but your input_length is only 135. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=67)


[---- 128----]
no truncate : 133
len tokens : 133 (101)


Your max_length is set to 350, but your input_length is only 141. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=70)


[---- 129----]
no truncate : 139
len tokens : 139 (109)


Your max_length is set to 350, but your input_length is only 78. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)


[---- 130----]
no truncate : 76
len tokens : 76 (51)


Your max_length is set to 350, but your input_length is only 92. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=46)


[---- 131----]
no truncate : 90
len tokens : 90 (70)


Your max_length is set to 350, but your input_length is only 71. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


[---- 132----]
no truncate : 69
len tokens : 69 (48)


Your max_length is set to 350, but your input_length is only 90. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=45)


[---- 133----]
no truncate : 88
len tokens : 88 (64)


Your max_length is set to 350, but your input_length is only 70. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


[---- 134----]
no truncate : 68
len tokens : 68 (54)


Your max_length is set to 350, but your input_length is only 37. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)


[---- 135----]
no truncate : 35
len tokens : 35 (28)


Your max_length is set to 350, but your input_length is only 66. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)


[---- 136----]
no truncate : 64
len tokens : 64 (44)


Your max_length is set to 350, but your input_length is only 45. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=22)


[---- 137----]
no truncate : 43
len tokens : 43 (33)


Your max_length is set to 350, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)


[---- 138----]
no truncate : 78
len tokens : 78 (53)
[---- 139----]
no truncate : 479
len tokens : 479 (393)
[---- 140----]
no truncate : 400
len tokens : 400 (330)
[---- 141----]
no truncate : 457
len tokens : 457 (380)
[---- 142----]
no truncate : 374
len tokens : 374 (313)
[---- 143----]
no truncate : 457
len tokens : 457 (382)


Your max_length is set to 350, but your input_length is only 300. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=150)


[---- 144----]
no truncate : 298
len tokens : 298 (250)


Your max_length is set to 350, but your input_length is only 193. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=96)


[---- 145----]
no truncate : 191
len tokens : 191 (140)


Your max_length is set to 350, but your input_length is only 69. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=34)


[---- 146----]
no truncate : 67
len tokens : 67 (50)


Your max_length is set to 350, but your input_length is only 85. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)


[---- 147----]
no truncate : 83
len tokens : 83 (56)


Your max_length is set to 350, but your input_length is only 203. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=101)


[---- 148----]
no truncate : 201
len tokens : 201 (149)


Your max_length is set to 350, but your input_length is only 190. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=95)


[---- 149----]
no truncate : 188
len tokens : 188 (140)


Your max_length is set to 350, but your input_length is only 75. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=37)


[---- 150----]
no truncate : 73
len tokens : 73 (53)


Your max_length is set to 350, but your input_length is only 225. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=112)


[---- 151----]
no truncate : 223
len tokens : 223 (169)


Your max_length is set to 350, but your input_length is only 158. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=79)


[---- 152----]
no truncate : 156
len tokens : 156 (119)


Your max_length is set to 350, but your input_length is only 118. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=59)


[---- 153----]
no truncate : 116
len tokens : 116 (88)


Your max_length is set to 350, but your input_length is only 225. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=112)


[---- 154----]
no truncate : 223
len tokens : 223 (169)


Your max_length is set to 350, but your input_length is only 254. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=127)


[---- 155----]
no truncate : 252
len tokens : 252 (194)


Your max_length is set to 350, but your input_length is only 209. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=104)


[---- 156----]
no truncate : 207
len tokens : 207 (161)
[---- 157----]
no truncate : 514
len tokens : 514 (462)
[---- 158----]
no truncate : 500
len tokens : 500 (452)
[---- 159----]
no truncate : 548
len tokens : 548 (483)
[---- 160----]
no truncate : 559
len tokens : 559 (487)


Your max_length is set to 350, but your input_length is only 223. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=111)


[---- 161----]
no truncate : 221
len tokens : 221 (189)
[---- 162----]
no truncate : 435
len tokens : 435 (402)


Your max_length is set to 350, but your input_length is only 14. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)


[---- 163----]
no truncate : 12
len tokens : 12 (12)
[---- 164----]
no truncate : 411
len tokens : 411 (384)
[---- 165----]
no truncate : 402
len tokens : 402 (380)


Your max_length is set to 350, but your input_length is only 65. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=32)


[---- 166----]
no truncate : 63
len tokens : 63 (53)


Your max_length is set to 350, but your input_length is only 184. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=92)


[---- 167----]
no truncate : 182
len tokens : 182 (164)
[---- 168----]
no truncate : 400
len tokens : 400 (379)


Your max_length is set to 350, but your input_length is only 179. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=89)


[---- 169----]
no truncate : 177
len tokens : 177 (159)


Your max_length is set to 350, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


[---- 170----]
no truncate : 16
len tokens : 16 (16)
[---- 171----]
no truncate : 538
len tokens : 538 (500)


Your max_length is set to 350, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


[---- 172----]
no truncate : 21
len tokens : 21 (20)


Your max_length is set to 350, but your input_length is only 182. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=91)


[---- 173----]
no truncate : 180
len tokens : 180 (134)


Your max_length is set to 350, but your input_length is only 157. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=78)


[---- 174----]
no truncate : 155
len tokens : 155 (119)


Your max_length is set to 350, but your input_length is only 81. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)


[---- 175----]
no truncate : 79
len tokens : 79 (56)


Your max_length is set to 350, but your input_length is only 91. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=45)


[---- 176----]
no truncate : 89
len tokens : 89 (64)


Your max_length is set to 350, but your input_length is only 187. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=93)


[---- 177----]
no truncate : 185
len tokens : 185 (145)


Your max_length is set to 350, but your input_length is only 96. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)


[---- 178----]
no truncate : 94
len tokens : 94 (69)


Your max_length is set to 350, but your input_length is only 209. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=104)


[---- 179----]
no truncate : 207
len tokens : 207 (158)


Your max_length is set to 350, but your input_length is only 196. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=98)


[---- 180----]
no truncate : 194
len tokens : 194 (151)


Your max_length is set to 350, but your input_length is only 200. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=100)


[---- 181----]
no truncate : 198
len tokens : 198 (155)


Your max_length is set to 350, but your input_length is only 238. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=119)


[---- 182----]
no truncate : 236
len tokens : 236 (173)


Your max_length is set to 350, but your input_length is only 269. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=134)


[---- 183----]
no truncate : 267
len tokens : 267 (202)


Your max_length is set to 350, but your input_length is only 212. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=106)


[---- 184----]
no truncate : 210
len tokens : 210 (159)


Your max_length is set to 350, but your input_length is only 282. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=141)


[---- 185----]
no truncate : 280
len tokens : 280 (246)


Your max_length is set to 350, but your input_length is only 284. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=142)


[---- 186----]
no truncate : 282
len tokens : 282 (251)


Your max_length is set to 350, but your input_length is only 111. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)


[---- 187----]
no truncate : 109
len tokens : 109 (98)


Your max_length is set to 350, but your input_length is only 93. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=46)


[---- 188----]
no truncate : 91
len tokens : 91 (72)


Your max_length is set to 350, but your input_length is only 296. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=148)


[---- 189----]
no truncate : 294
len tokens : 294 (260)


Your max_length is set to 350, but your input_length is only 172. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)


[---- 190----]
no truncate : 170
len tokens : 170 (146)


Your max_length is set to 350, but your input_length is only 266. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=133)


[---- 191----]
no truncate : 264
len tokens : 264 (230)


Your max_length is set to 350, but your input_length is only 215. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=107)


[---- 192----]
no truncate : 213
len tokens : 213 (192)


Your max_length is set to 350, but your input_length is only 244. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=122)


[---- 193----]
no truncate : 242
len tokens : 242 (215)


Your max_length is set to 350, but your input_length is only 279. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=139)


[---- 194----]
no truncate : 277
len tokens : 277 (243)


Your max_length is set to 350, but your input_length is only 251. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=125)


[---- 195----]
no truncate : 249
len tokens : 249 (222)
[---- 196----]
no truncate : 354
len tokens : 354 (314)
[---- 197----]
no truncate : 384
len tokens : 384 (268)


Your max_length is set to 350, but your input_length is only 276. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=138)


[---- 198----]
no truncate : 274
len tokens : 274 (190)


Your max_length is set to 350, but your input_length is only 282. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=141)


[---- 199----]
no truncate : 280
len tokens : 280 (196)


Your max_length is set to 350, but your input_length is only 276. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=138)


[---- 200----]
no truncate : 274
len tokens : 274 (190)


Your max_length is set to 350, but your input_length is only 275. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=137)


[---- 201----]
no truncate : 273
len tokens : 273 (191)


Your max_length is set to 350, but your input_length is only 244. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=122)


[---- 202----]
no truncate : 242
len tokens : 242 (174)


Your max_length is set to 350, but your input_length is only 241. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=120)


[---- 203----]
no truncate : 239
len tokens : 239 (168)


Your max_length is set to 350, but your input_length is only 193. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=96)


[---- 204----]
no truncate : 191
len tokens : 191 (136)


Your max_length is set to 350, but your input_length is only 205. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=102)


[---- 205----]
no truncate : 203
len tokens : 203 (150)


Your max_length is set to 350, but your input_length is only 240. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=120)


[---- 206----]
no truncate : 238
len tokens : 238 (170)


Your max_length is set to 350, but your input_length is only 199. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=99)


[---- 207----]
no truncate : 197
len tokens : 197 (142)


Your max_length is set to 350, but your input_length is only 106. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)


[---- 208----]
no truncate : 104
len tokens : 104 (76)


Your max_length is set to 350, but your input_length is only 106. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)


[---- 209----]
no truncate : 104
len tokens : 104 (76)


Your max_length is set to 350, but your input_length is only 123. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)


[---- 210----]
no truncate : 121
len tokens : 121 (88)


Your max_length is set to 350, but your input_length is only 99. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)


[---- 211----]
no truncate : 97
len tokens : 97 (76)


Your max_length is set to 350, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)


[---- 212----]
no truncate : 86
len tokens : 86 (65)


Your max_length is set to 350, but your input_length is only 85. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)


[---- 213----]
no truncate : 83
len tokens : 83 (62)


Your max_length is set to 350, but your input_length is only 111. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)


[---- 214----]
no truncate : 109
len tokens : 109 (75)


Your max_length is set to 350, but your input_length is only 100. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)


[---- 215----]
no truncate : 98
len tokens : 98 (72)


Your max_length is set to 350, but your input_length is only 107. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)


[---- 216----]
no truncate : 105
len tokens : 105 (75)


Your max_length is set to 350, but your input_length is only 98. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)


[---- 217----]
no truncate : 96
len tokens : 96 (70)


Your max_length is set to 350, but your input_length is only 122. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)


[---- 218----]
no truncate : 120
len tokens : 120 (86)


Your max_length is set to 350, but your input_length is only 130. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=65)


[---- 219----]
no truncate : 128
len tokens : 128 (93)


Your max_length is set to 350, but your input_length is only 98. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)


[---- 220----]
no truncate : 96
len tokens : 96 (67)


Your max_length is set to 350, but your input_length is only 94. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=47)


[---- 221----]
no truncate : 92
len tokens : 92 (69)


Your max_length is set to 350, but your input_length is only 108. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)


[---- 222----]
no truncate : 106
len tokens : 106 (77)


Your max_length is set to 350, but your input_length is only 91. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=45)


[---- 223----]
no truncate : 89
len tokens : 89 (62)


Your max_length is set to 350, but your input_length is only 73. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)


[---- 224----]
no truncate : 71
len tokens : 71 (56)


Your max_length is set to 350, but your input_length is only 78. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)


[---- 225----]
no truncate : 76
len tokens : 76 (58)


Your max_length is set to 350, but your input_length is only 129. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)


[---- 226----]
no truncate : 127
len tokens : 127 (81)


Your max_length is set to 350, but your input_length is only 110. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)


[---- 227----]
no truncate : 108
len tokens : 108 (70)


Your max_length is set to 350, but your input_length is only 79. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)


[---- 228----]
no truncate : 77
len tokens : 77 (51)


Your max_length is set to 350, but your input_length is only 129. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)


[---- 229----]
no truncate : 127
len tokens : 127 (83)


Your max_length is set to 350, but your input_length is only 70. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


[---- 230----]
no truncate : 68
len tokens : 68 (43)


Your max_length is set to 350, but your input_length is only 106. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)


[---- 231----]
no truncate : 104
len tokens : 104 (68)


Your max_length is set to 350, but your input_length is only 301. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=150)


[---- 232----]
no truncate : 299
len tokens : 299 (248)
[---- 233----]
no truncate : 350
len tokens : 350 (290)
[---- 234----]
no truncate : 354
len tokens : 354 (291)


Your max_length is set to 350, but your input_length is only 348. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=174)


[---- 235----]
no truncate : 346
len tokens : 346 (286)


Your max_length is set to 350, but your input_length is only 172. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)


[---- 236----]
no truncate : 170
len tokens : 170 (136)
[---- 237----]
no truncate : 450
len tokens : 450 (364)


Your max_length is set to 350, but your input_length is only 72. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)


[---- 238----]
no truncate : 70
len tokens : 70 (45)


Your max_length is set to 350, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)


[---- 239----]
no truncate : 125
len tokens : 125 (79)


Your max_length is set to 350, but your input_length is only 78. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)


[---- 240----]
no truncate : 76
len tokens : 76 (51)


Your max_length is set to 350, but your input_length is only 115. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)


[---- 241----]
no truncate : 113
len tokens : 113 (80)


Your max_length is set to 350, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)


[---- 242----]
no truncate : 78
len tokens : 78 (52)


Your max_length is set to 350, but your input_length is only 115. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)


[---- 243----]
no truncate : 113
len tokens : 113 (80)


In [28]:
r1, r2, rl = get_rouge_l(sums, refs)
print("r1 : " + str(r1) + " | r2 : " + str(r2) +" | rl : " + str(rl))

r1 : 0.2527887218842651 | r2 : 0.05717584641429692 | rl : 0.18633243849651931


In [None]:
sums[2]

In [None]:
processed_data[2]

In [None]:
refs[1]

'to save time , speaker mn005 will only mark the sample of transcribed data for regions of overlapping speech , as opposed to marking all acoustic events . the digits extraction task will be delegated to whomever is working on acoustics for the meeting recorder project .'