In [None]:
import os
import json

import torch
import tqdm

from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
train_data = json.load(open("/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/training_data/train.json"))
test_data = json.load(open("/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/test.json"))
files = os.listdir("/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/training_data/CT json")
files.remove(".DS_Store")

In [None]:
#Create a dictionary where keys are derived from filenames
files_data = {file[:-5]:json.load(open(f"/content/drive/MyDrive/Task-2-SemEval-2024-main/Task-2-SemEval-2024-main/training_data/CT json/{file}")) for file in files}

In [None]:
# Define the training function
def train(model, train_data, tokenizer, device):
    model.train()
    total_loss = 0
    for batch in train_data:
        inputs = tokenizer(batch['premise'], batch['hypothesis'], truncation=True, padding=True, return_tensors='pt').to(device)
        outputs = model(**inputs, labels=inputs['input_ids']).loss
        total_loss += outputs.item()
    return total_loss / len(train_data)

In [None]:
# Define the evaluation function
def evaluate(model, test_data, tokenizer, device):
    model.eval()
    predictions = []
    for batch in test_data:
        inputs = tokenizer(batch['premise'], batch['hypothesis'], truncation=True, padding=True, return_tensors='pt').to(device)
        outputs = model.generate(inputs['input_ids'], num_beams=4, max_length=15, early_stopping=True)
        predictions.append({'id': batch['id'], 'prediction': tokenizer.decode(outputs[0], skip_special_tokens=True)})
    return predictions

In [None]:
# Define the evaluation function
data_expanded = []
for _id, value in test_data.items():
    temp = {}
    temp["id"] = _id
    p_nctid = value["Primary_id"]     #extract values from data
    s_nctid = value.get("Secondary_id")
    section_id = value["Section_id"]
    statement = value["Statement"]
    primary_evidence = files_data[p_nctid][section_id]
    temp["statement"] = statement
    temp["primary_evidence"] = primary_evidence
    # temp["label"] = value["Label"]

    if s_nctid is not None:
        secondary_evidence = files_data[s_nctid][section_id]
        temp["secondary_evidence"] = secondary_evidence

    data_expanded.append(temp)

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-large-mnli")
model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-large-mnli")

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of the model checkpoint at albert/albert-base-v2 were not used when initializing AlbertForMaskedLM: ['albert.pooler.weight', 'albert.pooler.bias']
- This IS expected if you are initializing AlbertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def get_input_text(premise, hypothesis):
    options_prefix = "options: "
    separator = ", "
    options_ = options_prefix + separator.join(["entailment", "contradiction"])
    return f"{premise} [SEP] {hypothesis} [SEP] {options_}"

In [None]:
def get_input_text(premise, hypothesis):
    options_prefix = "OPTIONS:\n- "
    separator = "\n- "
    options_ = options_prefix + f"{separator}".join(["Entailment","Contradiction"])
    return f"{premise} \n Question: Does this imply that {hypothesis}? {options_}"

samples = []
for sample in data_expanded:
    primary_evidence = "".join(sample['primary_evidence'])
    sentence = f"Primary trial evidence are {primary_evidence}"
    secondary_evidence = sample.get("secondary_evidence")
    if secondary_evidence:
        secondary_evidence = "".join(sample['secondary_evidence'])
        sentence = f"{sentence} and Secondary trial evidence are {secondary_evidence}"
    input_text = get_input_text(sentence, sample['statement'])
    #temp = {"text":input_text, "Label":sample['Label']}
    temp = {"text":input_text,"label":get_input_text}
    samples.append(temp)


In [None]:
import torch
import tqdm

# Assuming model and tokenizer are defined
from transformers import BertLMHeadModel

# Assuming you have a pre-trained model path
model_path = "albert/albert-base-v2"

# Load the pre-trained model
model = BertLMHeadModel.from_pretrained(model_path)



# Move the model to CUDA
model.to("cuda")

labels = []
pred = []

# Use torch.no_grad() for inference mode
with torch.no_grad():
    for sample in tqdm.tqdm(samples):
        labels.append(sample["label"])
        input_ids = tokenizer(sample["text"], truncation='longest_first', max_length=512, padding=True, return_tensors="pt").input_ids.to("cuda")

        # Forward pass through the model
        outputs = model(input_ids)

        # Assuming DeBERTa model returns logits for classification
        logits = outputs.logits

        # Get the predicted class index
        predicted_class = torch.argmax(logits, dim=1).item()

        # Append the predicted class (or adjust as needed)
        pred.append("Contradiction" if predicted_class == 1 else "Entailment")

# Move the model back to CPU if needed
model.to("cpu")

You are using a model of type albert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of BertLMHeadModel were not initialized from the model checkpoint at albert/albert-base-v2 and are newly initialized: ['encoder.layer.0.output.LayerNorm.weight', 'encoder.layer.6.intermediate.dense.bias', 'encoder.layer.11.output.dense.bias', 'embeddings.word_embeddings.weight', 'encoder.layer.3.attention.self.key.weight', 'encoder.layer.5.attention.self.key.bias', 'encoder.layer.5.output.dense.weight', 'encoder.layer.7.attention.output.LayerNorm.bias', 'encoder.layer.10.attention.self.query.weight', 'encoder.layer.4.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.8.attention.output.dense.weight', 'encoder.layer.8.intermediate.dense.bias', 'encoder.layer.11.intermediate.dense.bias', 'encoder.layer.10.attention.s

TypeError: argument 'ids': 'list' object cannot be interpreted as an integer

In [None]:
pred

['¶',
 's',
 '+',
 'œ',
 'u',
 'ɩ',
 '٠',
 'ط',
 '&',
 'ۥ',
 'm',
 'ʅ',
 '˄',
 '¢',
 '&',
 'ᵪ',
 'ᅦ',
 'ʺ',
 'ϒ',
 '5',
 '¦',
 'ħ',
 'า',
 'x',
 'ခ',
 'x',
 'ϒ',
 '_',
 'ƙ',
 '_',
 'i',
 'v',
 'ᴪ',
 '\\',
 '⁄',
 'ש',
 'ı',
 'œ',
 '9',
 '¯',
 '±',
 'ย',
 'ɓ',
 '6',
 'i',
 '5',
 '%',
 'ᆼ',
 '-',
 'ˆ',
 'ϒ',
 'ʝ',
 'c',
 'ƞ',
 '˞',
 '´',
 'ð',
 'ə',
 'œ',
 'б',
 'ʁ',
 'b',
 'ψ',
 '،',
 'ŋ',
 '˟',
 'q',
 '˴',
 ';',
 's',
 'ԏ',
 'ˠ',
 's',
 'l',
 'ש',
 'ɿ',
 'ƿ',
 '˴',
 'œ',
 'ʈ',
 'v',
 "'",
 'ϲ',
 'ȶ',
 'ɳ',
 'ʱ',
 '}',
 '2',
 '&',
 '9',
 'œ',
 'ศ',
 'ͻ',
 '}',
 'ᆾ',
 "'",
 'ᅮ',
 '7',
 '%',
 'd',
 '·',
 'б',
 'ˇ',
 '/',
 'm',
 'م',
 'ˑ',
 'j',
 'ϼ',
 'ϼ',
 'ɹ',
 'ß',
 "'",
 'ǂ',
 '¯',
 "'",
 'ј',
 'ʅ',
 '~',
 'ɻ',
 'b',
 'ʊ',
 "'",
 '>',
 '&',
 'ı',
 'k',
 'ʺ',
 'ʈ',
 'ϕ',
 '5',
 'ŋ',
 'b',
 'ǁ',
 'ʰ',
 '،',
 '¬',
 'ƿ',
 '¥',
 'ǁ',
 'e',
 'щ',
 'ʹ',
 'ש',
 '˞',
 '%',
 'ԏ',
 '¥',
 'ʑ',
 '^',
 'ґ',
 'ͳ',
 '1',
 'u',
 '2',
 '\\',
 'q',
 '|',
 'q',
 ']',
 'c',
 '¢',
 'ـ',
 'บ',
 'ɒ',
 '®',
 '

In [None]:
from collections import Counter

# Example Counter result
counter_result = Counter({1: 4726, 0: 774})
# Mapping for Counter values to categories
category_mapping = {0: "Entailment", 1: "Contradiction"}

# Convert Counter values to categories
category_result = {category_mapping[key]: value for key, value in counter_result.items()}
print(category_result)

{'Contradiction': 4726, 'Entailment': 774}


In [None]:
#pred = [p[5:][:-4].strip() for p in pred]

In [None]:
#set(pred)
#from collections import Counter
#Counter(pred)

Counter({'l': 4726, 'adic': 774})

In [None]:
prediction_dict = {}
for _id,pred_x in zip(test_data, pred):
    prediction_dict[str(_id)] = {"Prediction":pred_x}

In [None]:
from sklearn.metrics import f1_score
uuid_list = list(prediction_dict.keys())
results_pred = []
gold_labels = []
for i in range(len(uuid_list)):
     if prediction_dict[uuid_list[i]]["Prediction"] in ["Entailment", "Yes"]:
         results_pred.append(1)
     else:
         results_pred.append(0)
     if test_data[uuid_list[i]]["Type"] in ["Entailment", "No"]:
         gold_labels.append(1)
     else:
         gold_labels.append(0)
f1_score(gold_labels,results_pred)

0.0

In [None]:
prediction_dict
json.dump(prediction_dict, open("deberta large.json", 'w'), indent=4)
