# Import

In [18]:
import os
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
np.object = object 

In [19]:
# arguments
input_max_len = 512

batch_size = 4

concat_options = ["without-options", "options-in-between", "options-at-end"]
concat_option_id = 0;

bert_model = 'bert-large-uncased' # or 'bert-base-uncased'

# data preparation

In [20]:
from sklearn.model_selection import train_test_split
from datasets import Dataset
import torch
from torch.utils.data import DataLoader
import json

In [21]:
def concat_text(question, article, options, tag = concat_option_id):
    if (tag == 0):
        return question.replace("@placeholder", '[MASK]') + ' [SEP] '+ article # 76.4 82.8; 77.3 81.1; 74.5 80.1 
    elif (tag == 1):
        return question.replace("@placeholder", '[MASK]') + ' [SEP] ' +  ' '.join(options)  + ' [SEP] ' + article # 76 82.2; 69.6 69; 66.9 69.7 
    elif (tag == 2):
        return question.replace("@placeholder", '[MASK]') + ' [SEP] ' + article + ' [SEP] ' +  ' '.join(options) # 76 83.1; 71.5 73.5; 68.6 77.1  

In [22]:
def read_examples(input_file):
    examples = []
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            json_line = json.loads(line.strip())
            article = json_line.get('article', '')
            label = json_line.get('label', '')
            question = json_line.get('question', '')
            options = [json_line.get(f'option_{i}', '') for i in range(5)]
            examples.append({
                "text" :  concat_text(question, article, options),
                "options" : options,
                "label" : int(label)
            })
    return examples

In [23]:
task_1_test_data_path = '../input/semevaldataset/trail_data/Task_1_Imperceptibility.jsonl'
task_2_test_data_path = '../input/semevaldataset/trail_data/Task_2_Nonspecificity.jsonl'

In [24]:
task_1_test_data = Dataset.from_pandas(pd.DataFrame(read_examples(task_1_test_data_path)))
task_2_test_data = Dataset.from_pandas(pd.DataFrame(read_examples(task_2_test_data_path)))

  if _pandas_api.is_sparse(col):


In [25]:
from transformers import BertTokenizer, BertForMaskedLM

In [27]:
tokenizer = BertTokenizer.from_pretrained(bert_model)  

In [28]:
def get_feature(example):
    # Concatenate the question and article with the sep token
    
    # Convert the concatenated text to tokens
    inputs = tokenizer(example['text'], max_length=input_max_len, truncation=True, padding='max_length', return_attention_mask=True)
    
    labels = tokenizer(example['options'][example['label']],  
                       add_special_tokens=False,
                       return_attention_mask=False,
                       return_token_type_ids=False,
                       max_length=1,
                       truncation=True).input_ids
    
    options = []
    for w in example['options']:
        options.append(tokenizer(w, add_special_tokens=False,
                                   return_attention_mask=False,
                                   return_token_type_ids=False,
                                   max_length=1,
                                   truncation=True).input_ids[0])
            
    example["input_ids"] = inputs.input_ids
    example["attention_mask"] = inputs.attention_mask
    example["labels"] = labels
    example["options"] = options
    example["answer"] = [example['label']]

    return example

In [29]:
remove_columns=["text", 'label']
columns = ["input_ids", "attention_mask", "labels", "options", "answer"]

In [30]:
task_1_test_tokenize = task_1_test_data.map(get_feature, remove_columns=remove_columns)
task_2_test_tokenize = task_2_test_data.map(get_feature, remove_columns=remove_columns)

  0%|          | 0/1000 [00:00<?, ?ex/s]

  0%|          | 0/1000 [00:00<?, ?ex/s]

In [31]:
task_1_test_tokenize.set_format(type='torch', columns=columns)
task_2_test_tokenize.set_format(type='torch', columns=columns)
task_1_test_tokenize

Dataset({
    features: ['options', 'input_ids', 'attention_mask', 'labels', 'answer'],
    num_rows: 1000
})

In [32]:
task_1_test_dataloader = DataLoader(task_1_test_tokenize, batch_size=batch_size)
task_2_test_dataloader = DataLoader(task_2_test_tokenize, batch_size=batch_size)

# eval

In [33]:
def eval(model, test_loader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model = model.to(device)
    model.eval()
    
    total = 0
    correct = 0
    
    for i, batch in enumerate(test_loader):
        with torch.no_grad():
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            gt = tokenizer.batch_decode(labels, skip_special_tokens=True)
            
            logits = model(input_ids, attention_mask=attention_mask).logits
            
            mask_token_indexes = torch.where(input_ids == tokenizer.mask_token_id)[1]
            # shape: [batch_size, vocab_size]
            mask_token_logits = logits[torch.arange(logits.shape[0]), mask_token_indexes, :]
            
            options = batch['options'].to(device)
            batch_range = torch.arange(options.shape[0]).unsqueeze(-1).repeat(1, options.shape[1])
            # advanced indexing
            option_logits = mask_token_logits[batch_range, options]
            # [[label1], [label2], [label3], [label4?]]
            top_token_indexes = torch.topk(option_logits, 1).indices.tolist() # label
            top_tokens = [[options[i, idx[0]]] for i, idx in enumerate(top_token_indexes)]  
            preds = tokenizer.batch_decode(top_tokens, skip_special_tokens=True)
            
            answers = [answer.item() for answer in batch['answer']]
            outputs = [idx[0] for idx in top_token_indexes]
            
            if i == 0:
                for i in range(input_ids.shape[0]):
                    print(f'expected: {gt[i]}({answers[i]}), output: {preds[i]}({outputs[i]})')
#                     print("expected: ", gt[i] , ``, "\t output: ", preds[i])
                    print('\n')

            total += len(gt)
            correct += sum(answers[i] == outputs[i] for i in range(len(answers)))

    print("Accuracy: ", correct / total)
    print("\n=================================================\n")

## Evaluation without fine tuning

In [35]:
model = BertForMaskedLM.from_pretrained(bert_model)
print("Task1: ")
eval(model, task_1_test_dataloader)
print("Task2: ")
eval(model, task_2_test_dataloader)

Downloading model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Task1: 
expected: lost(2), output: lost(2)


expected: reputation(4), output: reputation(4)


expected: potentially(0), output: potentially(0)


expected: classic(2), output: classic(2)


Accuracy:  0.788


Task2: 
expected: tough(3), output: condition(0)


expected: district(1), output: district(1)


expected: team(2), output: team(2)


expected: hand(1), output: hand(1)


Accuracy:  0.859




## Evaluation Task1, Cross1, Task2, Cross2

In [36]:
model = BertForMaskedLM.from_pretrained('../input/semevalmodelbert/' + concat_options[concat_option_id] + '/task_1_checkpoint')
print("Task1: ")
eval(model, task_1_test_dataloader)
print("Task1 Cross: ")
eval(model, task_2_test_dataloader)

model = BertForMaskedLM.from_pretrained('../input/semevalmodelbert/' + concat_options[concat_option_id] + '/task_2_checkpoint')
print("Task2: ")
eval(model, task_2_test_dataloader)
print("Task2 Cross: ")
eval(model, task_1_test_dataloader)