In [1]:
!pip install evaluate rouge-score > /dev/null 2>&1;

In [2]:
from datasets import load_dataset
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
import evaluate
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import r2_score
from tqdm import tqdm

In [3]:
rouge=evaluate.load("rouge")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

device(type='cuda')

In [4]:
answersumm = load_dataset("alexfabbri/answersumm")

README.md:   0%|          | 0.00/9.74k [00:00<?, ?B/s]

train.jsonl:   0%|          | 0.00/24.8M [00:00<?, ?B/s]

validation.jsonl:   0%|          | 0.00/4.43M [00:00<?, ?B/s]

test.jsonl:   0%|          | 0.00/8.76M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2783 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [5]:
train_data=answersumm['train']
query_dataset=[]
ref_summ_dataset=[]
sentence_dataset=[]
for sample in train_data:
    ref_summ=sample['summaries'][0][1]
    query=sample['question']['question']
    for ans in sample['answers']:
        for sent in ans['sents']:
            sentence_dataset.append(sent['text'])
            ref_summ_dataset.append(ref_summ)
            query_dataset.append(query)

In [5]:
def compute_rouge_score(sent,ref_summary):
    results=rouge.compute(predictions=[sent], references=[ref_summary])
    return results

In [6]:
class RelRegDataset(Dataset):
    def __init__(self, tokenizer, max_len, queries, sentences, summaries):
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.queries=queries
        self.summaries=summaries
        self.sentences=sentences

    def __len__(self):
        return len(self.queries)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        query = self.queries[idx]
        summary = self.summaries[idx]
        
        target = compute_rouge_score(sentence,summary)
        
        inputs = self.tokenizer(query, sentence, 
                                max_length=self.max_len, 
                                padding="max_length", 
                                truncation=True, 
                                return_tensors="pt")
        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'token_type_ids': inputs['token_type_ids'].squeeze(0),
            'targets': torch.tensor(target, dtype=torch.float)
        }

In [7]:
class RelRegModel(nn.Module):
    def __init__(self, model_name="bert-base-uncased"):
        super(RelRegModel, self).__init__()
        self.encoder = BertModel.from_pretrained(model_name)
        self.regressor = nn.Linear(self.encoder.config.hidden_size, 1)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.encoder(input_ids=input_ids, 
                               attention_mask=attention_mask, 
                               token_type_ids=token_type_ids)
        pooled_output = outputs.pooler_output
        score = self.regressor(pooled_output)
        return score

In [None]:
model=RelRegModel()
model=model.to(device)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.MSELoss()
epochs=5

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [10]:
train_dataset=RelRegDataset(tokenizer,512,query_dataset[:3200],sentence_dataset[:3200],ref_summ_dataset[:3200])
train_dataloader = DataLoader(train_dataset, batch_size=40, shuffle=True)

In [11]:
model.train()
for epoch in range(1,epochs+1):
    total_loss=0
    for i,batch in enumerate(train_dataloader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        targets = batch['targets'].to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask, token_type_ids)
        loss = criterion(outputs.squeeze(-1), targets)
        loss.backward()
        optimizer.step()
        if (i+1)%100==0:
            print(f"Epoch: {epoch}, Batch: {i+1} | {len(train_dataloader)}, Loss: {loss.item()}")
        total_loss += loss.item()
    train_loss=total_loss/len(train_dataloader)
    print()
    print(f"Epoch {epoch}, Loss: {train_loss:.4f}")
    print()


Epoch 1, Loss: 0.0190


Epoch 2, Loss: 0.0029


Epoch 3, Loss: 0.0027


Epoch 4, Loss: 0.0025


Epoch 5, Loss: 0.0024



In [12]:
torch.save(model, "RelReg2.pth")

In [9]:
model=torch.load('../input/anlp-relreg/RelReg.pth')

  model=torch.load('../input/anlp-relreg/RelReg.pth')


In [10]:
from transformers import BartForConditionalGeneration, BartTokenizer

In [None]:
model_name = "facebook/bart-large"
tokenizer2 = BartTokenizer.from_pretrained(model_name)
model2=torch.load('../input/bart-ft2/BART_FT2.pth')

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

  model2=torch.load('../input/bart-ft2/BART_FT2.pth')


In [14]:
def generate_summary(input_text):
    inputs = tokenizer2(input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model2.generate(inputs["input_ids"].to(device), max_length=256, min_length=10, length_penalty=2.0, num_beams=4)
    summary = tokenizer2.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [20]:
curr_data=answersumm['test']
scores_5=[]
scores_10=[]
scores_15=[]
for sample in tqdm(curr_data):
    ref_summ=sample['summaries'][0][1]
    query=sample['question']['question']
    sents=[]
    temp_scores=[]
    for ans in sample['answers']:
        for sent in ans['sents']:
            inputs = tokenizer(query, sent['text'], 
                            max_length=512, 
                            padding="max_length", 
                            truncation=True, 
                            return_tensors="pt")
            sents.append(sent['text'])
            curr={
                'input_ids': inputs['input_ids'],
                'attention_mask': inputs['attention_mask'],
                'token_type_ids': inputs['token_type_ids'],
            }

            input_ids = curr['input_ids'].to(device)
            attention_mask = curr['attention_mask'].to(device)
            token_type_ids = curr['token_type_ids'].to(device)
            outputs = model(input_ids, attention_mask, token_type_ids)
            temp_scores.extend(outputs.detach().cpu().numpy().flatten().tolist())

    sorted_strings = [string for _, string in sorted(zip(temp_scores, sents), reverse=True)]
    inp_str=""
    for s in sorted_strings[:5]:
        inp_str+=s
    scores_5.append(compute_rouge_score(generate_summary(inp_str),ref_summ))
    inp_str=""
    for s in sorted_strings[:10]:
        inp_str+=s
    scores_10.append(compute_rouge_score(generate_summary(inp_str),ref_summ))
    inp_str=""
    for s in sorted_strings[:15]:
        inp_str+=s
    scores_15.append(compute_rouge_score(generate_summary(inp_str),ref_summ))

100%|██████████| 1000/1000 [45:10<00:00,  2.71s/it]


In [None]:
from collections import defaultdict

sum_dict = defaultdict(int)
count_dict = defaultdict(int)
for d in scores_5:
    for key, value in d.items():
        sum_dict[key] += value
        count_dict[key] += 1
average_dict = {key: sum_val / count_dict[key] for key, sum_val in sum_dict.items()}
average_dict

{'rouge1': 0.21649382091736627,
 'rouge2': 0.05604541589640728,
 'rougeL': 0.1628605574126917,
 'rougeLsum': 0.16291938094210345}

In [None]:
sum_dict = defaultdict(int)
count_dict = defaultdict(int)
for d in scores_10:
    for key, value in d.items():
        sum_dict[key] += value
        count_dict[key] += 1
average_dict = {key: sum_val / count_dict[key] for key, sum_val in sum_dict.items()}
average_dict

{'rouge1': 0.22813088561609593,
 'rouge2': 0.06198112848945814,
 'rougeL': 0.17034762589288438,
 'rougeLsum': 0.17039890794416643}

In [None]:
sum_dict = defaultdict(int)
count_dict = defaultdict(int)
for d in scores_15:
    for key, value in d.items():
        sum_dict[key] += value
        count_dict[key] += 1
average_dict = {key: sum_val / count_dict[key] for key, sum_val in sum_dict.items()}
average_dict

{'rouge1': 0.23441618766905273,
 'rouge2': 0.06666096561070811,
 'rougeL': 0.1762783625629964,
 'rougeLsum': 0.17630557344735012}