In [None]:
!pip install tensorboardX
!pip install transformers -q
!pip install sentencepiece

In [None]:
!pip install  datasets  rouge-score nltk
from datasets import load_dataset, load_metric
import nltk
nltk.download('punkt')
metric = load_metric("rouge")

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler


device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Dataset Class for AMI corpus 

In [None]:
class AmiMeetingDataset(Dataset):
  def __init__(self,data, tokenizer,source_len,summary_len):
    super(AmiMeetingDataset,self).__init__()
    self.tokenizer = tokenizer
    self.data = data
    self.source_len  = source_len
    self.summary_len = summary_len 

    self.document = self.data.meeting
    self.summary = self.data.Summary

  def __len__(self):
    return len(self.document)

  def __getitem__(self,idx):
    text = str(self.document[idx])
    text = " ".join(text.split())

    summ = str(self.summary[idx])
    summ = " ".join(summ.split())

    # source = self.tokenizer.batch_encode_plus([text],max_length=self.source_len,
    #                                          pad_to_max_length = True,return_tensors ='pt' )
    # target = self.tokenizer.batch_encode_plus([summ],max_length = self.summary_len,
    #                                           pad_to_max_length=True,return_tensors='pt')
    
    source = self.tokenizer(text, max_length=self.source_len, truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(summ, max_length= self.summary_len, truncation=True)

    
    # source_ids = source['input_ids'].squeeze()
    # source_mask = source['attention_mask'].squeeze()

    # target_ids = target['input_ids'].squeeze()
    # target_mask = target['attention_mask'].squeeze()

       
    source_ids = source['input_ids']
    source_mask = source['attention_mask']

    target_ids =labels['input_ids']
    target_mask = labels['attention_mask']
    
    return {
        'input_ids': source_ids,
        'attention_mask': source_mask,
        'labels': target_ids
        # 'target_mask': target_mask,

    }

    # return {
    #     'source_ids': torch.tensor(source_ids,dtype = torch.long),
    #     'source_mask': torch.tensor(source_mask,dtype = torch.long),
    #     'target_ids': torch.tensor(target_mask,dtype = torch.long),
    #     'target_mask': torch.tensor(target_mask,dtype = torch.long),

    # }


  

In [None]:
class AmiMeetingDatasetVAL(Dataset):
  def __init__(self,data, tokenizer,source_len,summary_len):
    super(AmiMeetingDatasetVAL,self).__init__()
    self.tokenizer = tokenizer
    self.data = data
    self.source_len  = source_len
    self.summary_len = summary_len 

    self.document = self.data.meeting
    self.summary = self.data.Summary

  def __len__(self):
    return len(self.document)

  def __getitem__(self,idx):
    text = str(self.document[idx])
    text = " ".join(text.split())

    summ = str(self.summary[idx])
    summ = " ".join(summ.split())

    # source = self.tokenizer.batch_encode_plus([text],max_length=self.source_len,
    #                                          pad_to_max_length = True,return_tensors ='pt' )
    # target = self.tokenizer.batch_encode_plus([summ],max_length = self.summary_len,
    #                                           pad_to_max_length=True,return_tensors='pt')
    
    source = self.tokenizer(text, max_length=self.source_len, truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(summ, max_length= self.summary_len, truncation=True)

    
    # source_ids = source['input_ids'].squeeze()
    # source_mask = source['attention_mask'].squeeze()

    # target_ids = target['input_ids'].squeeze()
    # target_mask = target['attention_mask'].squeeze()

       
    source_ids = source['input_ids']
    source_mask = source['attention_mask']

    target_ids =labels['input_ids']
    target_mask = labels['attention_mask']
    
    return {
        'input_ids': source_ids,
        'labels': target_ids

    }

    # return {
    #     'source_ids': torch.tensor(source_ids,dtype = torch.long),
    #     'source_mask': torch.tensor(source_mask,dtype = torch.long),
    #     'target_ids': torch.tensor(target_mask,dtype = torch.long),
    #     'target_mask': torch.tensor(target_mask,dtype = torch.long),

    # }


  

In [None]:
train_df = pd.read_csv("/content/Train_data.csv")
valid_df = pd.read_csv("/content/Valid_data.csv")
test_df = pd.read_csv("/content/Test_data.csv")

# T5 *Pytorch_Lightning*

In [None]:
!pip install -q transformers==4.5.0
!pip install -q pytorch_lightning==1.2.7

In [None]:
import pandas as pd 
import numpy as np
import json
import torch
import pytorch_lightning as pl
from pathlib import Path
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

import textwrap
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5TokenizerFast as T5Tokenizer
)

from tqdm.auto import tqdm

In [None]:
pl.seed_everything(42)

In [None]:
train_df = pd.read_csv("Train_data.csv")
valid_df = pd.read_csv("Valid_data.csv")
test_df = pd.read_csv("Test_data.csv")

In [None]:
class AmiMeetingDataset(Dataset):
  def __init__(self,data, tokenizer,source_len,summary_len):
    super(AmiMeetingDataset,self).__init__()
    self.tokenizer = tokenizer
    self.data = data
    self.source_len  = source_len
    self.summary_len = summary_len 

    self.document = self.data.meeting
    self.summary = self.data.Summary

  def __len__(self):
    return len(self.document)

  def __getitem__(self,idx):
    text = str(self.document[idx])
    text = " ".join(text.split())

    summ = str(self.summary[idx])
    summ = " ".join(summ.split())


    source = self.tokenizer(
        text,
        max_length = self.source_len,
        padding = "max_length",
        truncation = True,
        return_attention_mask = True,
        add_special_tokens = True,
        return_tensors = "pt"
    )


    target = self.tokenizer(
        summ,
        max_length = self.summary_len,
        padding = "max_length",
        truncation = True,
        return_attention_mask = True,
        add_special_tokens = True,
        return_tensors = "pt"
    )


    labels = target["input_ids"]
    # FOR PAD TOKEN REPLACE WITH -100
    labels[labels==0] = -100

    return dict(
        text = text,
        summary = summ,
        text_input_ids = source["input_ids"].flatten(),
        text_attention_mask = source["attention_mask"].flatten(),
        labels = labels.flatten(),
        labels_attention_mask = target["attention_mask"].flatten()
    )


  

In [None]:
class AMIMeetingDataModule(pl.LightningDataModule):
  def __init__(self,train_df,valid_df,tokenizer,batch_size,source_max_len,target_max_len):

    super().__init__()

    self.train_df = train_df
    self.valid_df = valid_df
    self.tokenizer = tokenizer
    self.batch_size = batch_size
    self.source_max = source_max_len
    self.target_max = target_max_len


  def setup(self,stage=None):
    self.train_dataset = AmiMeetingDataset(
        self.train_df,
        self.tokenizer,
        self.source_max,
        self.target_max
    )

    self.valid_dataset = AmiMeetingDataset(
        self.valid_df,
        self.tokenizer,
        self.source_max,
        self.target_max
    )
     

  def train_dataloader(self):
    return DataLoader(
        self.train_dataset,
        batch_size = self.batch_size,
        shuffle = True,
        num_workers = 2 # in COLAB
    )



  def val_dataloader(self):
    return DataLoader(
        self.valid_dataset,
        batch_size = self.batch_size,
        shuffle = False,
        num_workers = 2 # in COLAB
    )

  def test_dataloader(self):
    return DataLoader(
        self.valid_dataset,
        batch_size = self.batch_size,
        shuffle = False,
        num_workers = 2 # in COLAB
    )


In [None]:
model_name = "t5-base"

tokenizer = T5Tokenizer.from_pretrained(model_name)

In [None]:
epochs = 3 
batch_size = 4

data_module = AMIMeetingDataModule(train_df,valid_df,tokenizer,batch_size=batch_size,source_max_len=512,target_max_len=128)

In [None]:
class SummModel(pl.LightningModule):
  
  def __init__(self):
    super().__init__()
    self.model = T5ForConditionalGeneration.from_pretrained(model_name,return_dict =True)

  def forward(self,input_ids,attention_mask,decoder_attention_mask,labels = None):

    out =  self.model(
        input_ids,
        attention_mask =attention_mask,
        labels =labels,
        decoder_attention_mask = decoder_attention_mask
    )

    return out.loss,out.logits


  def training_step(self,batch,batch_idx):
    input_ids = batch["text_input_ids"]
    attention_mask = batch["text_attention_mask"]
    labels = batch["labels"]
    labels_attention_mask = batch["labels_attention_mask"]

    loss,outputs = self(
        input_ids = input_ids,
        attention_mask = attention_mask,
        decoder_attention_mask = labels_attention_mask,
        labels =labels
    )

    self.log("train_loss",loss,prog_bar =True,logger =True)
    return loss

  def validation_step(self,batch,batch_idx):
    input_ids = batch["text_input_ids"]
    attention_mask = batch["text_attention_mask"]
    labels = batch["labels"]
    labels_attention_mask = batch["labels_attention_mask"]

    loss,outputs = self(
        input_ids = input_ids,
        attention_mask = attention_mask,
        decoder_attention_mask = labels_attention_mask,
        labels =labels
    )

    self.log("val_loss",loss,prog_bar =True,logger =True)
    return loss


  def test_step(self,batch,batch_idx):
    input_ids = batch["text_input_ids"]
    attention_mask = batch["text_attention_mask"]
    labels = batch["labels"]
    labels_attention_mask = batch["labels_attention_mask"]

    loss,outputs = self(
        input_ids = input_ids,
        attention_mask = attention_mask,
        decoder_attention_mask = labels_attention_mask,
        labels =labels
    )

    self.log("test_loss",loss,prog_bar =True,logger =True)
    return loss

  
  def configure_optimizers(self):
    return AdamW(self.parameters(),lr = 5e-4)




In [None]:
model = SummModel()

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

In [None]:
# epochs = 3
checkpoint_callback = ModelCheckpoint(
    dirpath = "checkpoints",
    filename = "best-checkpoint",
    save_top_k= 1 ,
    verbose = True,
    monitor = "val_loss",
    mode = "min"
)

logger = TensorBoardLogger("ligthning_logs",name = "AMI meeting Summary")

trainer = pl.Trainer(
    logger = logger,
    checkpoint_callback = checkpoint_callback,
    max_epochs = epochs,
    gpus =1 ,
    progress_bar_refresh_rate = 30
)

In [None]:
trainer.fit(model,data_module)

In [None]:
trained_model = SummModel.load_from_checkpoint(
    trainer.checkpoint_callback.best_model_path
)

trained_model.freeze()

In [None]:
def summarize(text):
  source = tokenizer(
      text,
      max_length = 512,
      padding = "max_length",
      add_special_tokens = True,
      truncation = True,
      return_attention_mask =True,
      return_tensors = "pt"
  )

  gen_ids = trained_model.model.generate(
      input_ids = source["input_ids"],
      attention_mask = source["attention_mask"],
      max_length = 150,
      num_beams = 2,
      repetition_penalty = 2.5,
      length_penalty = 1.0,
      early_stopping = True
  )

  preds = [
           tokenizer.decode(gen_id , skip_special_tokens=True,clean_up_tokenization_spaces=True)
           for gen_id in gen_ids
  ]

  return "".join(preds)

In [None]:
meetings = test_df.meeting.tolist()
preds = []
actual = []
for i in range(len(meetings)):
  summ = summarize(test_df.iloc[i]["meeting"])
  preds.append(summ)
  actual.append(test_df.iloc[i]["Summary"])

my_dict = {
    "preds" : preds,
    "actual" : actual
}

dfs = pd.DataFrame(my_dict)

In [None]:
metric.compute(predictions = dfs.preds.tolist(),references=dfs.actual.tolist())

In [None]:
dfs.to_csv("Predictions_T5_3.csv")

# Different Models(BART) using Huggingface example code(Results at end)

In [None]:
!git clone https://github.com/huggingface/transformers.git

Cloning into 'transformers'...
remote: Enumerating objects: 72664, done.[K
remote: Counting objects: 100% (700/700), done.[K
remote: Compressing objects: 100% (365/365), done.[K
remote: Total 72664 (delta 400), reused 505 (delta 295), pack-reused 71964[K
Receiving objects: 100% (72664/72664), 56.10 MiB | 29.32 MiB/s, done.
Resolving deltas: 100% (51536/51536), done.


In [None]:
!pip install git+https://github.com/huggingface/transformers

In [None]:
cd transformers

/content/transformers


In [None]:
!python examples/pytorch/summarization/run_summarization.py \
    --model_name_or_path facebook/bart-large-xsum  \
    --do_train \
    --do_eval \
    --train_file Train_data.csv \
    --validation_file Valid_data.csv \
    --output_dir ./bart-large-5 \
    --overwrite_output_dir \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
    --predict_with_generate  \
    --num_train_epochs=5 \
    --max_source_length=1024 \
    --max_target_length=128  \

# Pegasus

In [None]:

from transformers import PegasusForConditionalGeneration, PegasusTokenizer, Trainer, TrainingArguments
import torch
import pandas as pd
import numpy as np

class PegasusDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels['input_ids'][idx])  # torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)

      
def prepare_data(model_name, 
                 train_texts, train_labels, 
                 val_texts=None, val_labels=None, 
                 test_texts=None, test_labels=None):
  """
  Prepare input data for model fine-tuning
  """
  tokenizer = PegasusTokenizer.from_pretrained(model_name)

  prepare_val = False if val_texts is None or val_labels is None else True
  prepare_test = False if test_texts is None or test_labels is None else True

  def tokenize_data(texts, labels):
    encodings = tokenizer(texts, truncation=True, padding=True)
    decodings = tokenizer(labels, truncation=True, padding=True)
    dataset_tokenized = PegasusDataset(encodings, decodings)
    return dataset_tokenized

  train_dataset = tokenize_data(train_texts, train_labels)
  val_dataset = tokenize_data(val_texts, val_labels) if prepare_val else None
  test_dataset = tokenize_data(test_texts, test_labels) if prepare_test else None

  return train_dataset, val_dataset, test_dataset, tokenizer


def prepare_fine_tuning(model_name, tokenizer, train_dataset, val_dataset=None, freeze_encoder=False, output_dir='./results'):
  """
  Prepare configurations and base model for fine-tuning
  """
  torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
  model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

  if freeze_encoder:
    for param in model.model.encoder.parameters():
      param.requires_grad = False

  if val_dataset is not None:
    training_args = TrainingArguments(
      output_dir=output_dir,           # output directory
      num_train_epochs=2000,           # total number of training epochs
      per_device_train_batch_size=1,   # batch size per device during training, can increase if memory allows
      per_device_eval_batch_size=1,    # batch size for evaluation, can increase if memory allows
      save_steps=500,                  # number of updates steps before checkpoint saves
      save_total_limit=5,              # limit the total amount of checkpoints and deletes the older checkpoints
      evaluation_strategy='steps',     # evaluation strategy to adopt during training
      eval_steps=100,                  # number of update steps before evaluation
      warmup_steps=500,                # number of warmup steps for learning rate scheduler
      weight_decay=0.01,               # strength of weight decay
      logging_dir='./logs',            # directory for storing logs
      logging_steps=10,
    )

    trainer = Trainer(
      model=model,                         # the instantiated 🤗 Transformers model to be trained
      args=training_args,                  # training arguments, defined above
      train_dataset=train_dataset,         # training dataset
      eval_dataset=val_dataset,            # evaluation dataset
      tokenizer=tokenizer
    )

  else:
    training_args = TrainingArguments(
      output_dir=output_dir,           # output directory
      num_train_epochs=2000,           # total number of training epochs
      per_device_train_batch_size=1,   # batch size per device during training, can increase if memory allows
      save_steps=500,                  # number of updates steps before checkpoint saves
      save_total_limit=5,              # limit the total amount of checkpoints and deletes the older checkpoints
      warmup_steps=500,                # number of warmup steps for learning rate scheduler
      weight_decay=0.01,               # strength of weight decay
      logging_dir='./logs',            # directory for storing logs
      logging_steps=10,
    )

    trainer = Trainer(
      model=model,                         # the instantiated 🤗 Transformers model to be trained
      args=training_args,                  # training arguments, defined above
      train_dataset=train_dataset,         # training dataset
      tokenizer=tokenizer
    )

  return trainer

if __name__=='__main__':

  # use Pegasus Large model as base for fine-tuning
  train_df = pd.read_csv("Train_data.csv")
  valid_df = pd.read_csv("Valid_data.csv")
  model_name = 'google/pegasus-large'
  train_dataset, _, _, tokenizer = prepare_data(model_name,train_df.meeting.tolist() , train_df.Summary.tolist())
  valid_dataset,_,_,_ = prepare_data('google/pegasus-large',valid_df.meeting.tolist(),valid_df.Summary.tolist())
  trainer = prepare_fine_tuning(model_name, tokenizer, train_dataset,valid_dataset)
  trainer.train()

In [None]:
!nvidia-smi

Sun May 16 14:06:57 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8    10W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# INFERENCE CODE

In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import pandas as pd

test_data = pd.read_csv("Test_data.csv")

src_text = test_data.meeting.tolist()[10:] 

#print(len(src_text))
model_name = "bart-large-5"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#tokenizer = PegasusTokenizer.from_pretrained(model_name)
#model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model =  AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
batch = tokenizer(src_text, truncation=True, padding='longest', return_tensors="pt").to(device)
translated = model.generate(**batch)
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)


my_dict = {
    "preds" : tgt_text,
    "actual": test_data.Summary.tolist()[10:]
}

df = pd.DataFrame(my_dict)
df.to_csv("predictions_next.csv")


# Inference *RESULTS* For Different Models


Pegasus(Checkpoint 2000)

In [None]:
pred1 = pd.read_csv("predictions.csv")
pred2 = pd.read_csv("predictions_next.csv")

join = [pred1 , pred2]

df_joined = pd.concat(join)

In [None]:
len(df_joined)

20

In [None]:
metric.compute(predictions = df_joined.preds.tolist(),references=df_joined.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.40439701877823314, recall=0.38630733460681654, fmeasure=0.3937626430485465), mid=Score(precision=0.4444366393125837, recall=0.4237678142916943, fmeasure=0.42624927062594753), high=Score(precision=0.48068321988116774, recall=0.4625331467549687, fmeasure=0.45906982125913903)),
 'rouge2': AggregateScore(low=Score(precision=0.12458523703290367, recall=0.12073949480832438, fmeasure=0.12098769475005403), mid=Score(precision=0.15433596561299567, recall=0.1490883644094902, fmeasure=0.14983723438068564), high=Score(precision=0.18997105872123438, recall=0.1813915473923218, fmeasure=0.1839126798822726)),
 'rougeL': AggregateScore(low=Score(precision=0.23392570618320602, recall=0.22357771669528104, fmeasure=0.2267561221269161), mid=Score(precision=0.2570639066482677, recall=0.2485966011135103, fmeasure=0.24852690345716782), high=Score(precision=0.2847828492216706, recall=0.27456540082606723, fmeasure=0.27219113738748474)),
 'rougeLsum': AggregateScor

Pegasus(checkpoint 500)

In [None]:
pred1 = pd.read_csv("predictions.csv")
pred2 = pd.read_csv("predictions_next.csv")

join = [pred1 , pred2]

df_joined = pd.concat(join)

print(len(df_joined))

20


In [None]:
metric.compute(predictions = df_joined.preds.tolist(),references=df_joined.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.40211277615885865, recall=0.39234667365282516, fmeasure=0.397605136899324), mid=Score(precision=0.43421213741803166, recall=0.4269870354465292, fmeasure=0.425449913878605), high=Score(precision=0.46671674011468645, recall=0.4607441850197499, fmeasure=0.4520415536233954)),
 'rouge2': AggregateScore(low=Score(precision=0.1213129388243323, recall=0.12117196321727253, fmeasure=0.12037421687772198), mid=Score(precision=0.14781681839745286, recall=0.14550699052105232, fmeasure=0.14521575661738373), high=Score(precision=0.17696585317985492, recall=0.1699515278250861, fmeasure=0.17097294905866073)),
 'rougeL': AggregateScore(low=Score(precision=0.23003516973343985, recall=0.2271096641154576, fmeasure=0.2293651565691293), mid=Score(precision=0.25235416905945407, recall=0.25030817891351265, fmeasure=0.24875347871846643), high=Score(precision=0.2781784928682444, recall=0.27372597716911284, fmeasure=0.27010109520837977)),
 'rougeLsum': AggregateScore

Pegasus(Checkpoint 1500)

In [None]:
pred1 = pd.read_csv("predictions.csv")
pred2 = pd.read_csv("predictions_next.csv")

join = [pred1 , pred2]

df_joined = pd.concat(join)

print(len(df_joined))

20


In [None]:
metric.compute(predictions = df_joined.preds.tolist(),references=df_joined.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.406699465144985, recall=0.38282560762996654, fmeasure=0.39488096221208757), mid=Score(precision=0.4430835774347117, recall=0.4192634558031275, fmeasure=0.4257134687893359), high=Score(precision=0.4827872825118298, recall=0.4568455613621107, fmeasure=0.4586526166012035)),
 'rouge2': AggregateScore(low=Score(precision=0.1298468056918049, recall=0.1249140826893274, fmeasure=0.12582238593977754), mid=Score(precision=0.1590850174160227, recall=0.15163412024877954, fmeasure=0.15399441035493955), high=Score(precision=0.19682900898743136, recall=0.1839056341745951, fmeasure=0.1884435258306613)),
 'rougeL': AggregateScore(low=Score(precision=0.2372090792494723, recall=0.22263049808985003, fmeasure=0.22804472301095652), mid=Score(precision=0.258937481798605, recall=0.24651447523816974, fmeasure=0.24994804432493223), high=Score(precision=0.2896825262701912, recall=0.27139490235137764, fmeasure=0.27352524796502065)),
 'rougeLsum': AggregateScore(low=

BART LARGE-XSUM MODEL EPOCHS 3

In [None]:
pred1 = pd.read_csv("predictions.csv")
pred2 = pd.read_csv("predictions_next.csv")

join =       [pred1 , pred2]

df_joined = pd.concat(join)

print(len(df_joined))

20


In [None]:
metric.compute(predictions = df_joined.preds.tolist(),references=df_joined.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.5746671827735507, recall=0.19057381286383798, fmeasure=0.28595163965380466), mid=Score(precision=0.6054803684635222, recall=0.21178198469275838, fmeasure=0.3107321986264726), high=Score(precision=0.640777504464937, recall=0.2333606758246301, fmeasure=0.33676334909990996)),
 'rouge2': AggregateScore(low=Score(precision=0.23071877474986124, recall=0.0778220005262467, fmeasure=0.11661006295900479), mid=Score(precision=0.2681107409098134, recall=0.0926059364967537, fmeasure=0.13671023657264827), high=Score(precision=0.304030092745855, recall=0.10854956868746374, fmeasure=0.1571748869784799)),
 'rougeL': AggregateScore(low=Score(precision=0.4018138514101452, recall=0.13379287342769194, fmeasure=0.2002756288395219), mid=Score(precision=0.4336514513239581, recall=0.15212434148001341, fmeasure=0.2234465283657594), high=Score(precision=0.4708245299885798, recall=0.17100546840634606, fmeasure=0.246767064520745)),
 'rougeLsum': AggregateScore(low=Sc

BART LARGE-XSUM MODEL EPOCHS 5

In [None]:
pred1 = pd.read_csv("predictions.csv")
pred2 = pd.read_csv("predictions_next.csv")

join = [pred1 , pred2]

df_joined = pd.concat(join)

print(len(df_joined))

20


In [None]:
metric.compute(predictions = df_joined.preds.tolist(),references=df_joined.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.5278100195294285, recall=0.17254989359927053, fmeasure=0.2605829458817205), mid=Score(precision=0.5695345083320745, recall=0.19298840811009993, fmeasure=0.2853467377800024), high=Score(precision=0.6223712381940615, recall=0.22118575023977785, fmeasure=0.3197053993817845)),
 'rouge2': AggregateScore(low=Score(precision=0.189251483574894, recall=0.06289894563474789, fmeasure=0.09427330896780932), mid=Score(precision=0.2356845420061302, recall=0.0785780448530408, fmeasure=0.11645810679746713), high=Score(precision=0.2951388453150923, recall=0.10719152450778976, fmeasure=0.1534538532008188)),
 'rougeL': AggregateScore(low=Score(precision=0.3640071998847442, recall=0.11990640700539144, fmeasure=0.18054519244975445), mid=Score(precision=0.40377038201112636, recall=0.1382703340090887, fmeasure=0.20361809431037303), high=Score(precision=0.4509006948720759, recall=0.16229406104793925, fmeasure=0.23341003215273992)),
 'rougeLsum': AggregateScore(lo

T5(epoch 5 ,gradient clip at 0.5)

In [None]:
df = pd.read_csv("/content/Predictions_T5_5_Sclip.csv")

In [None]:
metric.compute(predictions = df.preds.tolist(),references=df.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.4595323283108672, recall=0.29502399485672376, fmeasure=0.3586385758622941), mid=Score(precision=0.4954428361948009, recall=0.3231773589633429, fmeasure=0.3866229151874762), high=Score(precision=0.5244657927718478, recall=0.354457017807162, fmeasure=0.4132528945009049)),
 'rouge2': AggregateScore(low=Score(precision=0.15301713106698206, recall=0.09943085744505108, fmeasure=0.1191041949933987), mid=Score(precision=0.18297866228652954, recall=0.11983030282632462, fmeasure=0.1429878003861625), high=Score(precision=0.21128559317167508, recall=0.14325976052255787, fmeasure=0.16559942745104897)),
 'rougeL': AggregateScore(low=Score(precision=0.2779882853457587, recall=0.17849925347339185, fmeasure=0.21764326508974954), mid=Score(precision=0.3019269133345725, recall=0.19864709337554576, fmeasure=0.23623259620598341), high=Score(precision=0.32759840533334617, recall=0.21892206326297034, fmeasure=0.25450298555609574)),
 'rougeLsum': AggregateScore(

T5(EPOCH 5, SWA(stochastic Weight Averaging)

In [None]:
df = pd.read_csv("/content/Predictions_T5_5_SWA.csv")

In [None]:
metric.compute(predictions = df.preds.tolist(),references=df.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.42743105860063046, recall=0.26913376770816044, fmeasure=0.33208205581741773), mid=Score(precision=0.4553977108854025, recall=0.2936530721412022, fmeasure=0.35163382491497064), high=Score(precision=0.4858160638348559, recall=0.32157321090711183, fmeasure=0.37458863972996254)),
 'rouge2': AggregateScore(low=Score(precision=0.11671719907508529, recall=0.07487920686988073, fmeasure=0.09061918505637574), mid=Score(precision=0.14384684641949858, recall=0.09396966038337073, fmeasure=0.11218612095527998), high=Score(precision=0.17088031109003626, recall=0.11425425131450291, fmeasure=0.13236537485632066)),
 'rougeL': AggregateScore(low=Score(precision=0.25074407377493585, recall=0.15703721665058792, fmeasure=0.19315434409680732), mid=Score(precision=0.2708404923997352, recall=0.17570121167542557, fmeasure=0.21014983334891119), high=Score(precision=0.29143494907877315, recall=0.1951415912325562, fmeasure=0.2258556497554936)),
 'rougeLsum': Aggregat

T5(EPOCH 5 ,LR =5e-4)

In [None]:
df = pd.read_csv("/content/Predictions_T5_5_5e-4.csv")

In [None]:
metric.compute(predictions = df.preds.tolist(),references=df.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.45673921606631496, recall=0.28658059920007817, fmeasure=0.3527037724886849), mid=Score(precision=0.48718814853027914, recall=0.31109946358133966, fmeasure=0.3752302173720957), high=Score(precision=0.5137265417941873, recall=0.3317867779121553, fmeasure=0.3928223740251708)),
 'rouge2': AggregateScore(low=Score(precision=0.13456042401082516, recall=0.08516357735932743, fmeasure=0.10368824940253416), mid=Score(precision=0.1584198655667381, recall=0.1014952196192484, fmeasure=0.12286989252356749), high=Score(precision=0.18255176953787308, recall=0.11794421779796727, fmeasure=0.14026875023722402)),
 'rougeL': AggregateScore(low=Score(precision=0.2599808672886207, recall=0.16178695311284844, fmeasure=0.1998102870284882), mid=Score(precision=0.2780782270050828, recall=0.17873629844315658, fmeasure=0.21496881659254846), high=Score(precision=0.29627542350521263, recall=0.19645994661702038, fmeasure=0.2294537126187311)),
 'rougeLsum': AggregateScor

T5(Epoch 3, LR = 5e-4)

In [None]:
df = pd.read_csv("//content/Predictions_T5_3_5e-4.csv")

In [None]:
metric.compute(predictions = df.preds.tolist(),references=df.actual.tolist())

{'rouge1': AggregateScore(low=Score(precision=0.4460893396702219, recall=0.28797903478612047, fmeasure=0.35140629368487364), mid=Score(precision=0.49025058142958056, recall=0.3148715397960282, fmeasure=0.3780338036991201), high=Score(precision=0.5266273896659619, recall=0.3426143433213742, fmeasure=0.40308288668605463)),
 'rouge2': AggregateScore(low=Score(precision=0.1366658800626159, recall=0.08668727863246513, fmeasure=0.10587417641906405), mid=Score(precision=0.16485325366102493, recall=0.10613776960291618, fmeasure=0.1273194040712594), high=Score(precision=0.19139232931654318, recall=0.12583690448187995, fmeasure=0.14797175875644353)),
 'rougeL': AggregateScore(low=Score(precision=0.28378451151266654, recall=0.1832237182115016, fmeasure=0.22335848846356934), mid=Score(precision=0.307487176228171, recall=0.19844833114948143, fmeasure=0.23771466482413225), high=Score(precision=0.33160708092402297, recall=0.21602500532279442, fmeasure=0.2511992230178269)),
 'rougeLsum': AggregateScor

In [None]:
import nltk
import numpy as np

def compute_metrics(eval_pred):
    # predictions, labels = eval_pred
    # decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # # Replace -100 in the labels as we can't decode them.
    # labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    # decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # # Rouge expects a newline after each sentence
    # decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    # decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
    
    decoded_preds = eval_pred.preds.tolist()
    decoded_labels = eval_pred.actual.tolist()
    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    # Extract a few results
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    
    # Add mean generated length
    # prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    # result["gen_len"] = np.mean(prediction_lens)
   
    return {k: round(v, 4) for k, v in result.items()}

In [None]:
df = pd.read_csv("/content/Predictions_T5_5_Sclip.csv")

In [None]:
compute_metrics(df)

{'rouge1': 41.5455, 'rouge2': 15.2441, 'rougeL': 24.8294, 'rougeLsum': 24.7878}