In [33]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn import functional as F
import json
from tqdm import tqdm
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("kortukov/answer-equivalence-bem")
model = AutoModelForSequenceClassification.from_pretrained("kortukov/answer-equivalence-bem")
FILE_1 = '/kaggle/working/BEM_Score_version_1.json'
FILE_2 = '/kaggle/working/BEM_Score_version_2.json'

In [5]:
def tokenize_function(question, reference, candidate):
    text = f"[CLS] {candidate} [SEP]"
    text_pair = f"{reference} [SEP] {question} [SEP]"
    return tokenizer(text=text, text_pair=text_pair, add_special_tokens=False, padding='max_length', truncation=True, return_tensors='pt')


In [7]:
with open('/new_data/evaluation.json', 'r') as file:
    data = json.load(file)

In [12]:
score = {}
for i in tqdm(range(len(data))):
    inputs = tokenize_function(data[i]['question'], data[i]['ground_truths'][0], data[i]['answer'])
    out = model(**inputs)
    prediction = F.softmax(out.logits, dim=-1)
    score[i] = prediction[0][1].item()
    

100%|██████████| 4938/4938 [43:06<00:00,  1.91it/s]


In [40]:
val = np.array(list(score.values()))
score_mean = val.mean()
score_mean

0.7413942305915852

In [36]:
#If the answer has an inclusive relationship with the ground truth, calculate its score as the maximum of the average score and the original score.
score_version_2 = {}
for i in tqdm(range(len(data))):
    if data[i]['ground_truths'][0].lower() in  data[i]['answer'].lower() or data[i]['answer'].lower() in data[i]['ground_truths'][0].lower():
        score_version_2[i] = max(score_mean, score[i])
    else:
        score_version_2[i] = score[i]

100%|██████████| 4938/4938 [00:00<00:00, 431372.19it/s]


In [37]:
val_2 = np.array(list(score_version_2.values()))
score_mean_2 = val_2.mean()
score_mean_2

0.7519599791498358

In [28]:
def Add_score_to_json(data, score, FILE):
    for i in range(len(data)):
        data[i]['score'] = score[i]
    with open(FILE, 'w') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

In [39]:
Add_score_to_json(data, val_2, FILE_2)

4938