In [1]:
!pip install evaluate rouge-score > /dev/null 2>&1;

In [2]:
from datasets import load_dataset
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
import evaluate
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import r2_score
from tqdm import tqdm
from transformers import BartForConditionalGeneration, BartTokenizer
import numpy as np

In [3]:
rouge=evaluate.load("rouge")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

device(type='cuda')

In [4]:
answersumm = load_dataset("alexfabbri/answersumm")

README.md:   0%|          | 0.00/9.74k [00:00<?, ?B/s]

train.jsonl:   0%|          | 0.00/24.8M [00:00<?, ?B/s]

validation.jsonl:   0%|          | 0.00/4.43M [00:00<?, ?B/s]

test.jsonl:   0%|          | 0.00/8.76M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2783 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [5]:
train_data=answersumm['train']
query_dataset=[]
sentence_dataset=[]
label_dataset=[]
summary_dataset=[]
for sample in train_data:
    ref_summ=sample['summaries'][0][1]
    query=sample['question']['question']
    for ans in sample['answers']:
        for sent in ans['sents']:
            clust_id=sent['cluster_id'][0][0]
            label_dataset.append(0 if clust_id==-1 else 1)
            sentence_dataset.append(sent['text'])
            query_dataset.append(query)
            summary_dataset.append(ref_summ)

In [5]:
def compute_rouge_score(sent,ref_summary):
    results=rouge.compute(predictions=[sent], references=[ref_summary])
    return results

In [6]:
class RelRegDataset(Dataset):
    def __init__(self, tokenizer, max_len, queries, sentences, labels, summaries):
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.queries=queries
        self.labels=labels
        self.sentences=sentences
        self.summaries=summaries

    def __len__(self):
        return len(self.queries)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        query = self.queries[idx]
        target = self.labels[idx]
        
        inputs = self.tokenizer(query, sentence, 
                                max_length=self.max_len, 
                                padding="max_length", 
                                truncation=True, 
                                return_tensors="pt")
        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'token_type_ids': inputs['token_type_ids'].squeeze(0),
            'targets': torch.tensor(target, dtype=torch.float),
            'summ': self.summaries[idx],
        }

In [7]:
class RelRegModel(nn.Module):
    def __init__(self, model_name="bert-base-uncased"):
        super(RelRegModel, self).__init__()
        self.encoder = BertModel.from_pretrained(model_name)
        self.regressor = nn.Linear(self.encoder.config.hidden_size, 1)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.encoder(input_ids=input_ids, 
                               attention_mask=attention_mask, 
                               token_type_ids=token_type_ids)
        pooled_output = outputs.pooler_output
        score = torch.sigmoid(self.regressor(pooled_output))
        return score

In [8]:
model=RelRegModel()
model=model.to(device)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.BCELoss()
epochs=5

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [13]:
train_dataset=RelRegDataset(tokenizer,512,query_dataset[:30000],sentence_dataset[:30000],label_dataset[:30000],summary_dataset[:30000])
train_dataloader = DataLoader(train_dataset, batch_size=40, shuffle=True)

In [14]:
model.train()
for epoch in range(epochs):
    total_loss=0
    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs}"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        targets = batch['targets'].to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask, token_type_ids)
        loss = criterion(outputs.squeeze(-1), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    train_loss=total_loss/len(train_dataloader)
    print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}")

Epoch 1/5:  30%|███       | 227/750 [08:19<19:12,  2.20s/it]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Epoch 1/5: 100%|██████████| 750/750 [27:29<00:00,  2.20s/it]


Epoch 1, Loss: 0.3924


Epoch 2/5:  66%|██████▋   | 497/750 [18:13<09:15,  2.19s/it]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Epoch 2/5: 100%|██████████| 750/750 [27:28<00:00,  2.20s/it]


Epoch 2, Loss: 0.2779


Epoch 3/5:  71%|███████▏  | 535/750 [19:37<07:52,  2.20s/it]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Epoch 3/5: 100%|██████████| 750/750 [27:30<00:00,  2.20s/it]


Epoch 3, Loss: 0.1538


Epoch 4/5:  54%|█████▍    | 407/750 [14:55<12:35,  2.20s/it]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Epoch 4/5: 100%|██████████| 750/750 [27:30<00:00,  2.20s/it]


Epoch 4, Loss: 0.0749


Epoch 5/5:  18%|█▊        | 137/750 [05:01<22:32,  2.21s/it]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Epoch 5/5: 100%|██████████| 750/750 [27:30<00:00,  2.20s/it]

Epoch 5, Loss: 0.0474





In [19]:
torch.save(model, "RelRegRel_30k.pth")

In [9]:
model=torch.load('../input/anlp-relreg10k/RelRegRel_10k.pth')

  model=torch.load('../input/anlp-relreg10k/RelRegRel_10k.pth')


In [10]:
model_name = "facebook/bart-large"
tokenizer2 = BartTokenizer.from_pretrained(model_name)
# model2 = BartForConditionalGeneration.from_pretrained(model_name).to(device)
model2=torch.load('../input/bart-ft2/BART_FT2.pth')
model2=model2.to(device)

def generate_summary(input_text):
    inputs = tokenizer2(input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model2.generate(inputs["input_ids"].to(device), max_length=256, min_length=10, length_penalty=2.0, num_beams=4)
    summary = tokenizer2.decode(summary_ids[0], skip_special_tokens=True)
    return summary

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

  model2=torch.load('../input/bart-ft2/BART_FT2.pth')


In [11]:
model.eval()
curr_data=answersumm['test']
scores_5=[]
scores_10=[]
scores_15=[]
for sample in tqdm(curr_data):
    ref_summ=sample['summaries'][0][1]
    query=sample['question']['question']
    sents=[]
    temp_scores=[]
    for ans in sample['answers']:
        for sent in ans['sents']:
            inputs = tokenizer(query, sent['text'], 
                            max_length=512, 
                            padding="max_length", 
                            truncation=True, 
                            return_tensors="pt")
            curr={
                'input_ids': inputs['input_ids'],
                'attention_mask': inputs['attention_mask'],
                'token_type_ids': inputs['token_type_ids'],
            }

            input_ids = curr['input_ids'].to(device)
            attention_mask = curr['attention_mask'].to(device)
            token_type_ids = curr['token_type_ids'].to(device)
            outputs = model(input_ids, attention_mask, token_type_ids)
            if outputs.item()>=0.5:
                temp_scores.append(outputs.item())
                sents.append(sent['text'])

    sorted_strings = [string for _, string in sorted(zip(temp_scores, sents), reverse=True)]
    inp_str=""
    for s in sorted_strings[:5]:
        inp_str+=s
    scores_5.append(compute_rouge_score(generate_summary(inp_str),ref_summ))
    inp_str=""
    for s in sorted_strings[:10]:
        inp_str+=s
    scores_10.append(compute_rouge_score(generate_summary(inp_str),ref_summ))
    inp_str=""
    for s in sorted_strings[:15]:
        inp_str+=s
    scores_15.append(compute_rouge_score(generate_summary(inp_str),ref_summ))

100%|██████████| 1000/1000 [40:50<00:00,  2.45s/it]


In [12]:
from collections import defaultdict

sum_dict = defaultdict(int)
count_dict = defaultdict(int)

# Iterate through the list of dictionaries
for d in scores_5:
    for key, value in d.items():
        sum_dict[key] += value
        count_dict[key] += 1

# Calculate the average for each key
average_dict = {key: sum_val / count_dict[key] for key, sum_val in sum_dict.items()}
average_dict

{'rouge1': 0.2147745246976527,
 'rouge2': 0.05856866825743714,
 'rougeL': 0.16374635433129672,
 'rougeLsum': 0.16387968766463004}

In [13]:
sum_dict = defaultdict(int)
count_dict = defaultdict(int)

# Iterate through the list of dictionaries
for d in scores_10:
    for key, value in d.items():
        sum_dict[key] += value
        count_dict[key] += 1

# Calculate the average for each key
average_dict = {key: sum_val / count_dict[key] for key, sum_val in sum_dict.items()}
average_dict

{'rouge1': 0.22294463049895938,
 'rouge2': 0.061920629106610316,
 'rougeL': 0.1680199544942722,
 'rougeLsum': 0.16817390638430657}

In [14]:
sum_dict = defaultdict(int)
count_dict = defaultdict(int)

# Iterate through the list of dictionaries
for d in scores_15:
    for key, value in d.items():
        sum_dict[key] += value
        count_dict[key] += 1

# Calculate the average for each key
average_dict = {key: sum_val / count_dict[key] for key, sum_val in sum_dict.items()}
average_dict

{'rouge1': 0.2239469318831701,
 'rouge2': 0.06326262523183113,
 'rougeL': 0.17020881118329514,
 'rougeLsum': 0.1704042134821457}