In [None]:
import os
import json

import torch
import tqdm

from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
train_data = json.load(open("/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/training_data/train.json"))
test_data = json.load(open("/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/test.json"))
files = os.listdir("/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/training_data/CT json")
files.remove(".DS_Store")

In [None]:
#Create a dictionary where keys are derived from filenames
files_data = {file[:-5]:json.load(open(f"/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/training_data/CT json/{file}")) for file in files}

In [None]:
# Define the training function
def train(model, train_data, tokenizer, device):
    model.train()
    total_loss = 0
    for batch in train_data:
        inputs = tokenizer(batch['premise'], batch['hypothesis'], truncation=True, padding=True, return_tensors='pt').to(device)
        outputs = model(**inputs, labels=inputs['input_ids']).loss
        total_loss += outputs.item()
    return total_loss / len(train_data)

In [None]:
# Define the evaluation function
def evaluate(model, test_data, tokenizer, device):
    model.eval()
    predictions = []
    for batch in test_data:
        inputs = tokenizer(batch['premise'], batch['hypothesis'], truncation=True, padding=True, return_tensors='pt').to(device)
        outputs = model.generate(inputs['input_ids'], num_beams=4, max_length=15, early_stopping=True)
        predictions.append({'id': batch['id'], 'prediction': tokenizer.decode(outputs[0], skip_special_tokens=True)})
    return predictions

In [None]:
# Define the evaluation function
data_expanded = []
for _id, value in test_data.items():
    temp = {}
    temp["id"] = _id
    p_nctid = value["Primary_id"]     #extract values from data
    s_nctid = value.get("Secondary_id")
    section_id = value["Section_id"]
    statement = value["Statement"]
    primary_evidence = files_data[p_nctid][section_id]
    temp["statement"] = statement
    temp["primary_evidence"] = primary_evidence
    # temp["label"] = value["Label"]

    if s_nctid is not None:
        secondary_evidence = files_data[s_nctid][section_id]
        temp["secondary_evidence"] = secondary_evidence

    data_expanded.append(temp)

In [None]:
from transformers import AutoTokenizer, AutoModelForMaskedLM

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
model = AutoModelForMaskedLM.from_pretrained("google-bert/bert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at google-bert/bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def get_input_text(premise, hypothesis):
    options_prefix = "OPTIONS:\n- "
    separator = "\n- "
    options_ = options_prefix + f"{separator}".join(["Entailment","Contradiction"])
    return f"{premise} \n Question: Does this imply that {hypothesis}? {options_}"

In [None]:
def get_input_text(premise, hypothesis):
    options_prefix = "OPTIONS:\n- "
    separator = "\n- "
    options_ = options_prefix + f"{separator}".join(["Entailment","Contradiction"])
    return f"{premise} \n Question: Does this imply that {hypothesis}? {options_}"

samples = []
for sample in data_expanded:
    primary_evidence = "".join(sample['primary_evidence'])
    sentence = f"Primary trial evidence are {primary_evidence}"
    secondary_evidence = sample.get("secondary_evidence")
    if secondary_evidence:
        secondary_evidence = "".join(sample['secondary_evidence'])
        sentence = f"{sentence} and Secondary trial evidence are {secondary_evidence}"
    input_text = get_input_text(sentence, sample['statement'])
    #temp = {"text":input_text, "Label":sample['Label']}
    temp = {"text":input_text,"label":get_input_text}
    samples.append(temp)


In [None]:
import torch
import tqdm

# Move model to GPU
model.to("cuda")

# Set model to evaluation mode
model.eval()

labels = []
pred = []

# Define the maximum sequence length
desired_max_length = 512  # Adjust this value according to your requirements

# Disable gradient calculations
with torch.no_grad():
    for sample in tqdm.tqdm(samples):
        labels.append(sample["label"])
        input_ids = tokenizer(sample["text"], return_tensors="pt").input_ids.to("cuda")
        outputs = model.generate(input_ids, max_new_tokens=desired_max_new_tokens)

        pred.append(tokenizer.decode(outputs[0]))

# Ensure model is back in evaluation mode if you plan to continue training or fine-tuning
model.train()


In [None]:
pred

['[CLS] Primary trial evidence are Adverse Events 1 : Total : 0 / 733 ( 0. 00 % ) Adverse Events 2 : Total : 0 / 765 ( 0. 00 % ) and Secondary trial evidence are Adverse Events 1 : Total : 0 / 88 ( 0. 00 % ) Adverse Events 2 : Total : 0 / 86 ( 0. 00 % ) Question : Does this imply that the record shows 5 total cases of asthenia and 3 total cases of pyrexia in the patients of the primary and secondary clinical trials.? OPTIONS : - Entailment - Contradiction [SEP]..................................................................................................................................................................................................................................................................................................................................................................................................']

In [None]:
pred = [p[5:][:-4].strip() for p in pred]

In [None]:
set(pred)
from collections import Counter
Counter(pred)

Counter({'Primary trial evidence are Adverse Events 1 : Total : 0 / 733 ( 0. 00 % ) Adverse Events 2 : Total : 0 / 765 ( 0. 00 % ) and Secondary trial evidence are Adverse Events 1 : Total : 0 / 88 ( 0. 00 % ) Adverse Events 2 : Total : 0 / 86 ( 0. 00 % ) Question : Does this imply that the record shows 5 total cases of asthenia and 3 total cases of pyrexia in the patients of the primary and secondary clinical trials.? OPTIONS : - Entailment - Contradiction [SEP]..............................................': 1})

In [None]:
prediction_dict = {}
for _id,pred_x in zip(test_data, pred):
    prediction_dict[str(_id)] = {"Prediction":pred_x}

In [None]:
from sklearn.metrics import f1_score
uuid_list = list(prediction_dict.keys())
results_pred = []
gold_labels = []
for i in range(len(uuid_list)):
     if prediction_dict[uuid_list[i]]["Prediction"] in ["Entailment", "Yes"]:
         results_pred.append(1)
     else:
         results_pred.append(0)
     if test_data[uuid_list[i]]["Type"] in ["Entailment", "No"]:
         gold_labels.append(1)
     else:
         gold_labels.append(0)
f1_score(gold_labels,results_pred)

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


0.0

In [None]:
prediction_dict
json.dump(prediction_dict, open("biobertuncased.json", 'w'), indent=4)
