In [34]:
import json
import pandas as pd

TRAIN_PATH = "/kaggle/input/nlp-project-dataset/Training data/train.json"
DEV_PATH = "/kaggle/input/nlp-project-dataset/Training data/dev.json"
TEST_PATH = "/kaggle/input/nlp-project-dataset/Training data/test.json"

###TASK 1
def generate_nli_data(file_path):
    '''
    Generates data from clinical trials for Task 1: Textual entailment (NLI).

    Parameters:
        file_path (str): Path to the JSON of the dataset.

    Returns:
        joint_data: List of training instances in form of "claim [SEP] evidence_text" (str)
        labels: List of labels, either 1 for "Entailment" or 0 for "Contradiction" (int)
    '''

    #Read the file.
    df = pd.read_json(file_path)
    df = df.transpose()

    #Extract claims and labels. Map labels to binary values (0, 1).
    claims = df.Statement.tolist()
    labels = df.Label.tolist()
    labels = list(map(lambda x : 1 if x == "Entailment" else 0, labels))
#     print(labels)

    #(Prepare to) Extract all evidence sentences from clinical trials
    evidence_texts = list()
    if('Secondary_id' in df.columns):
        primary_cts, secondary_cts = df.Primary_id, df.Secondary_id 
        primary_indices = df.Primary_evidence_index 
        secondary_indices = df.Secondary_evidence_index
    else:
        primary_cts = df.Primary_id
        primary_indices = df.Primary_evidence_index 

   
    sections, types = df.Section_id, df.Type

    #Generate evidence texts for each claim.
    for claim_id in range(len(claims)):
        file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"

        with open(file_name, 'r') as f:
            data = json.load(f)
            evidence = "primary trial: " 

            #Evidence for the primary trial is in form:
            # "primary trial: sent_1. sent_2. (...) sent_n."           
            for i in primary_indices[claim_id]:
                evidence += data[sections[claim_id]][i]
                evidence += " "
                
        #If it is a comparative claim, also add evidence sentences from the 2nd trial.
        if types[claim_id] == "Comparison":
            file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"

            #Evidence for the secondary trial is in form:
            # "| secondary trial: sent_1. sent_2. (...) sent_n."
            with open(file_name, 'r') as f:
                data = json.load(f)
                evidence += " | secondary trial: "
                for i in secondary_indices[claim_id]:
                    evidence += data[sections[claim_id]][i]
                    evidence += " "

        evidence_texts.append(evidence)

    #One training instance is: "claim [SEP] full_evidence_text"
    joint_data = list()
    for i in range(len(claims)):
        premise = claims[i]
        hypothesis = evidence_texts[i]
        joint = premise + " [SEP] " + hypothesis
        joint_data.append(joint)

    return joint_data, labels


###TASK 2
def generate_evidence_data(file_path):
    '''
    Generates data from clinical trials for Task 2: Evidence Retrieval (/selection).

    Parameters:
        file_path (str): Path to the JSON of the dataset.

    Returns:
        joint_data: List of training instances in form of "claim [SEP] candidate_sentence" (str)
        labels: List of labels, 0 if candidate_sentence is not evidence, 1 if it is
    '''

    #Read the file.
    df = pd.read_json(file_path)
    df = df.transpose()

    #Extract claims.
    claims = df.Statement.tolist()

    #(Prepare to) Extract all evidence sentences from clinical trials
    primary_cts, secondary_cts = df.Primary_id, df.Secondary_id    
    primary_indices = df.Primary_evidence_index 
    secondary_indices = df.Secondary_evidence_index
    sections, types = df.Section_id, df.Type

    primary_evidence_sentences = list()
    secondary_evidence_sentences = list()

    for idx in range(len(claims)):
        file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[idx] + ".json"

        #Create a list of all evidence sentences from the primary trial for this claim.
        with open(file_name, 'r') as f:
            data = json.load(f)
            primary_evidence_sentences.append(data[sections[idx]])

        #If it is a comparative claim, also create a list of secondary-trial evidence sentences.
        if types[idx] == "Comparison":
            file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[idx] + ".json"

            with open(file_name, 'r') as f:
                data = json.load(f)
                secondary_evidence_sentences.append(data[sections[idx]])
        else:
            secondary_evidence_sentences.append(list())

    #Generate training instances in form of "claim [SEP] candidate_sentence", 
    joint_data = list()

    #Label is 0 if candidate sentece is not evidence for this claim, 1 if it is   
    labels = list() 

    for claim_id in range(len(claims)):
        claim = claims[claim_id]
        primary_sents = primary_evidence_sentences[claim_id]

        for sid in range(len(primary_sents)):
            candidate_sentence = primary_sents[sid]
            j = candidate_sentence + " [SEP] " + claim
            joint_data.append(j)
            labels.append(sid in primary_indices[claim_id])

        if types[claim_id] == "Comparison":
            secondary_sents = secondary_evidence_sentences[claim_id]
            for sid in range(len(secondary_sents)):
                candidate_sentence = secondary_sents[sid]
                j = candidate_sentence + " [SEP] " + claim
                joint_data.append(j)
                labels.append(sid in secondary_indices[claim_id])

        labels = [1 if l else 0 for l in labels]

    return joint_data, labels

In [2]:
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
from safetensors.torch import load_file

file_path = "/kaggle/input/finetuned/model.safetensors"

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# from prepare_data import generate_nli_data

TRAIN_PATH = "/kaggle/input/nlp-project-dataset/Training data/train.json"
DEV_PATH = "/kaggle/input/nlp-project-dataset/Training data/dev.json"
TEST_PATH = "/kaggle/input/nlp-project-dataset/Training data/test.json"

#Torch dataset used in the models. Consists of encodings of training instances and of labels.
#One training instance is: BERT_TOKENIZER("claim [SEP] evidence_text").
class CtDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


models = ["ynie/xlnet-large-cased-snli_mnli_fever_anli_R1_R2_R3-nli",
"ynie/albert-xxlarge-v2-snli_mnli_fever_anli_R1_R2_R3-nli",
"MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli",
"microsoft/deberta-v2-xlarge-mnli",
"MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"]

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Standard metrics
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, average='binary', zero_division=0)  # Handling division by zero
    recall = recall_score(labels, preds, average='binary', zero_division=0)   # Handling division by zero

    faithfulness_metric = sum(preds != labels) / len(preds)
    consistency_metric = sum(preds == labels) / len(preds) 
    
    metrics = {
        "accuracy": acc, 
        "precision": prec, 
        "recall": recall, 
        "f1": f1,
        "faithfulness": faithfulness_metric,
        "consistency": consistency_metric
    }

    return metrics


#Training loop.
def train(model_name, eval_val):
    #model_name = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"

    #Load the models. Adjust max instance length to fit your machine.
    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=1024, use_safetensors = True)
#     model = AutoModelForSequenceClassification.from_pretrained('/kaggle/input/finetuned',
#                                  num_labels=2, ignore_mismatched_sizes=True)
    model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                 num_labels=2, ignore_mismatched_sizes=True)
#     model.load_state_dict(torch.load('/kaggle/input/finetuned/model.safetensors'))


    #Generate joint claim+[SEP]+evidence data.
    joint_train, labels_train = generate_nli_data(TRAIN_PATH)
    joint_dev, labels_dev= generate_nli_data(DEV_PATH)

    #Tokenize the data.    
    encoded_train = tokenizer(joint_train, return_tensors='pt',
                         truncation_strategy='only_first', add_special_tokens=True, padding=True)
    encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                         truncation_strategy='only_first', add_special_tokens=True, padding=True)
   
    #Convert data into datasets
    train_dataset = CtDataset(encoded_train, labels_train)
    dev_dataset = CtDataset(encoded_dev, labels_dev)

    #Define the batch size to fit your GPU memory.
    batch_size = 8
#     print(joint_train)

    logging_steps = len(joint_train) // batch_size
    output_name = f"finetuned-model"

    training_args = TrainingArguments(output_dir=output_name,
                                 per_device_train_batch_size=batch_size,
                                 per_device_eval_batch_size=batch_size,
                                 
                                 #for faster training time
                                 dataloader_pin_memory=True, 
                                 dataloader_num_workers=4,
                                 gradient_accumulation_steps=2,
                                 fp16=True,

                                 #training hyperparameters
                                 num_train_epochs=30,
                                 learning_rate=2e-05,
                                 weight_decay=0.07,
                                 warmup_ratio=0.1,

                                 #other parameters
                                 evaluation_strategy="epoch",
                                 save_strategy="no",
                                 disable_tqdm=False,
                                 logging_steps=logging_steps,
                                 push_to_hub=False)

    trainer = Trainer(model=model, args=training_args,
                    compute_metrics=compute_metrics,
                    train_dataset=train_dataset,
                    eval_dataset=dev_dataset,
                    tokenizer=tokenizer )
    print(f"Number of epochs: {trainer.args.num_train_epochs}")
    print(f"Batch size: {trainer.args.per_device_train_batch_size}")
    print(f"Learning rate: {trainer.args.learning_rate}")

    #Start the training process.
    if(eval_val = True):
    trainer.train()

    #Save the fine-tuned NLI (textual entailment) model.
    trainer.save_model("model-nli")

2024-04-28 17:58:17.617591: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-28 17:58:17.617648: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-28 17:58:17.619078: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
#train('MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli')
#train('domenicrosati/debertav3small-NLI4CT')
train('MoritzLaurer/DeBERTa-v3-small-mnli-fever-docnli-ling-2c')



config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/568M [00:00<?, ?B/s]

  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Number of epochs: 30
Batch size: 8
Learning rate: 2e-05


[34m[1mwandb[0m: Currently logged in as: [33msrijaalla10[0m ([33mlalla-ub[0m). Use [1m`wandb login --relogin`[0m to force relogin


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Faithfulness,Consistency
0,No log,0.858782,0.485,0.489933,0.73,0.452113,0.515,0.485
2,0.957300,0.723737,0.535,0.519337,0.94,0.443763,0.465,0.535
4,0.694700,0.667582,0.59,0.6125,0.49,0.585859,0.41,0.59
6,0.662100,0.671046,0.63,0.627451,0.64,0.629963,0.37,0.63
8,0.586100,0.766657,0.645,0.612403,0.79,0.637376,0.355,0.645
10,0.467100,0.887614,0.625,0.621359,0.64,0.624916,0.375,0.625
12,0.353200,1.083016,0.61,0.603774,0.64,0.609649,0.39,0.61
14,0.266100,1.283011,0.615,0.618557,0.6,0.614913,0.385,0.615
16,0.215000,1.757001,0.615,0.616162,0.61,0.61499,0.385,0.615
18,0.185500,1.994388,0.61,0.619565,0.57,0.609375,0.39,0.61


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encod

In [7]:
tokenizer = AutoTokenizer.from_pretrained('MoritzLaurer/DeBERTa-v3-small-mnli-fever-docnli-ling-2c', model_max_length=1024, use_safetensors = True)
model = AutoModelForSequenceClassification.from_pretrained('/kaggle/working/model-nli',num_labels=2, ignore_mismatched_sizes=True)



In [21]:
 #Define the batch size to fit your GPU memory.
DEV_PATH = '/kaggle/input/extra-sebis/Numerical_Statements.json'
joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
encoded_train = tokenizer(joint_train, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8

logging_steps = len(joint_train) // batch_size
output_name = f"finetuned-model-eval"
training_args = TrainingArguments(output_dir=output_name,
                                 per_device_train_batch_size=batch_size,
                                 per_device_eval_batch_size=batch_size,
                                 
                                 #for faster training time
                                 dataloader_pin_memory=True, 
                                 dataloader_num_workers=4,
                                 gradient_accumulation_steps=2,
                                 fp16=True,

                                 #training hyperparameters
                                 num_train_epochs=30,
                                 learning_rate=2e-05,
                                 weight_decay=0.07,
                                 warmup_ratio=0.1,

                                 #other parameters
                                 evaluation_strategy="epoch",
                                 save_strategy="no",
                                 disable_tqdm=False,
                                 logging_steps=logging_steps,
                                 push_to_hub=False)

trainer = Trainer(model=model, args=training_args,
                    compute_metrics=compute_metrics,
                    train_dataset=train_dataset,
                    eval_dataset=dev_dataset,
                    tokenizer=tokenizer )

  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [22]:
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
print(f"Evaluation metrics on new dataset: {new_eval_metrics}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Evaluation metrics on new dataset: {'eval_loss': 2.8413820266723633, 'eval_accuracy': 0.6666666666666666, 'eval_precision': 0.6410256410256411, 'eval_recall': 0.5952380952380952, 'eval_f1': 0.6652557319223986, 'eval_faithfulness': 0.3333333333333333, 'eval_consistency': 0.6666666666666666, 'eval_runtime': 1.7991, 'eval_samples_per_second': 51.692, 'eval_steps_per_second': 6.67}


In [23]:
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.841382,0.666667,0.641026,0.595238,0.665256,0.333333,0.666667


In [24]:
DEV_PATH = '/kaggle/input/extra-sebis/Non_Numerical_Statements.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)



  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.189567,0.607477,0.617647,0.724138,0.60075,0.392523,0.607477


In [26]:
DEV_PATH = '/kaggle/input/extra-sebis/Comparison.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.03807,0.65,0.628571,0.733333,0.647552,0.35,0.65


In [27]:
DEV_PATH = '/kaggle/input/extra-sebis/Adverse Events.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.799125,0.634615,0.62069,0.692308,0.633395,0.365385,0.634615


In [28]:
DEV_PATH = '/kaggle/input/extra-sebis/Eligibility.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.175565,0.642857,0.653846,0.607143,0.642401,0.357143,0.642857


In [29]:
DEV_PATH = '/kaggle/input/extra-sebis/Intervention.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.868942,0.638889,0.631579,0.666667,0.63861,0.361111,0.638889


In [30]:
DEV_PATH = '/kaggle/input/extra-sebis/Results.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.551571,0.625,0.606061,0.714286,0.621986,0.375,0.625


In [35]:
DEV_PATH = '/kaggle/input/extra-sebis/single.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.0232,0.628571,0.625,0.642857,0.628496,0.371429,0.628571
