In [1]:
import json
import pandas as pd

TRAIN_PATH = "/kaggle/input/nlp-project-dataset/Training data/train.json"
DEV_PATH = "/kaggle/input/nlp-project-dataset/Training data/dev.json"
TEST_PATH = "/kaggle/input/nlp-project-dataset/Training data/test.json"

###TASK 1
def generate_nli_data(file_path):
    '''
    Generates data from clinical trials for Task 1: Textual entailment (NLI).

    Parameters:
        file_path (str): Path to the JSON of the dataset.

    Returns:
        joint_data: List of training instances in form of "claim [SEP] evidence_text" (str)
        labels: List of labels, either 1 for "Entailment" or 0 for "Contradiction" (int)
    '''

    #Read the file.
    df = pd.read_json(file_path)
    df = df.transpose()

    #Extract claims and labels. Map labels to binary values (0, 1).
    claims = df.Statement.tolist()
    labels = df.Label.tolist()
    labels = list(map(lambda x : 1 if x == "Entailment" else 0, labels))
#     print(labels)

    #(Prepare to) Extract all evidence sentences from clinical trials
    evidence_texts = list()
    if('Secondary_id' in df.columns):
        primary_cts, secondary_cts = df.Primary_id, df.Secondary_id 
        primary_indices = df.Primary_evidence_index 
        secondary_indices = df.Secondary_evidence_index
    else:
        primary_cts = df.Primary_id
        primary_indices = df.Primary_evidence_index 

   
    sections, types = df.Section_id, df.Type

    #Generate evidence texts for each claim.
    for claim_id in range(len(claims)):
        file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"

        with open(file_name, 'r') as f:
            data = json.load(f)
            evidence = "primary trial: " 

            #Evidence for the primary trial is in form:
            # "primary trial: sent_1. sent_2. (...) sent_n."           
            for i in primary_indices[claim_id]:
                evidence += data[sections[claim_id]][i]
                evidence += " "
                
        #If it is a comparative claim, also add evidence sentences from the 2nd trial.
        if types[claim_id] == "Comparison":
            file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"

            #Evidence for the secondary trial is in form:
            # "| secondary trial: sent_1. sent_2. (...) sent_n."
            with open(file_name, 'r') as f:
                data = json.load(f)
                evidence += " | secondary trial: "
                for i in secondary_indices[claim_id]:
                    evidence += data[sections[claim_id]][i]
                    evidence += " "

        evidence_texts.append(evidence)

    #One training instance is: "claim [SEP] full_evidence_text"
    joint_data = list()
    for i in range(len(claims)):
        premise = claims[i]
        hypothesis = evidence_texts[i]
        joint = premise + " [SEP] " + hypothesis
        joint_data.append(joint)

    return joint_data, labels


###TASK 2
def generate_evidence_data(file_path):
    '''
    Generates data from clinical trials for Task 2: Evidence Retrieval (/selection).

    Parameters:
        file_path (str): Path to the JSON of the dataset.

    Returns:
        joint_data: List of training instances in form of "claim [SEP] candidate_sentence" (str)
        labels: List of labels, 0 if candidate_sentence is not evidence, 1 if it is
    '''

    #Read the file.
    df = pd.read_json(file_path)
    df = df.transpose()

    #Extract claims.
    claims = df.Statement.tolist()

    #(Prepare to) Extract all evidence sentences from clinical trials
    primary_cts, secondary_cts = df.Primary_id, df.Secondary_id    
    primary_indices = df.Primary_evidence_index 
    secondary_indices = df.Secondary_evidence_index
    sections, types = df.Section_id, df.Type

    primary_evidence_sentences = list()
    secondary_evidence_sentences = list()

    for idx in range(len(claims)):
        file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[idx] + ".json"

        #Create a list of all evidence sentences from the primary trial for this claim.
        with open(file_name, 'r') as f:
            data = json.load(f)
            primary_evidence_sentences.append(data[sections[idx]])

        #If it is a comparative claim, also create a list of secondary-trial evidence sentences.
        if types[idx] == "Comparison":
            file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[idx] + ".json"

            with open(file_name, 'r') as f:
                data = json.load(f)
                secondary_evidence_sentences.append(data[sections[idx]])
        else:
            secondary_evidence_sentences.append(list())

    #Generate training instances in form of "claim [SEP] candidate_sentence", 
    joint_data = list()

    #Label is 0 if candidate sentece is not evidence for this claim, 1 if it is   
    labels = list() 

    for claim_id in range(len(claims)):
        claim = claims[claim_id]
        primary_sents = primary_evidence_sentences[claim_id]

        for sid in range(len(primary_sents)):
            candidate_sentence = primary_sents[sid]
            j = candidate_sentence + " [SEP] " + claim
            joint_data.append(j)
            labels.append(sid in primary_indices[claim_id])

        if types[claim_id] == "Comparison":
            secondary_sents = secondary_evidence_sentences[claim_id]
            for sid in range(len(secondary_sents)):
                candidate_sentence = secondary_sents[sid]
                j = candidate_sentence + " [SEP] " + claim
                joint_data.append(j)
                labels.append(sid in secondary_indices[claim_id])

        labels = [1 if l else 0 for l in labels]

    return joint_data, labels

In [2]:
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
from safetensors.torch import load_file

file_path = "/kaggle/input/finetuned/model.safetensors"

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# from prepare_data import generate_nli_data
best_f1 = 0.0

TRAIN_PATH = "/kaggle/input/nlp-project-dataset/Training data/train.json"
DEV_PATH = "/kaggle/input/nlp-project-dataset/Training data/dev.json"
TEST_PATH = "/kaggle/input/nlp-project-dataset/Training data/test.json"

#Torch dataset used in the models. Consists of encodings of training instances and of labels.
#One training instance is: BERT_TOKENIZER("claim [SEP] evidence_text").
class CtDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


models = ["ynie/xlnet-large-cased-snli_mnli_fever_anli_R1_R2_R3-nli",
"ynie/albert-xxlarge-v2-snli_mnli_fever_anli_R1_R2_R3-nli",
"MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli",
"microsoft/deberta-v2-xlarge-mnli",
"MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"]

def compute_metrics(pred):
#     global best_f1, best_model_path
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Standard metrics
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, average='binary', zero_division=0)  # Handling division by zero
    recall = recall_score(labels, preds, average='binary', zero_division=0)   # Handling division by zero

    faithfulness_metric = sum(preds != labels) / len(preds)
    consistency_metric = sum(preds == labels) / len(preds) 
    
    metrics = {
        "accuracy": acc, 
        "precision": prec, 
        "recall": recall, 
        "f1": f1,
        "faithfulness": faithfulness_metric,
        "consistency": consistency_metric
    }

    
#     if f1 > best_f1:
#         best_f1 = f1
#         output_dir = trainer.args.output_dir
#         best_model_path = os.path.join(output_dir, "best_model")
#         trainer.save_model("best-model")
# #         trainer.tokenizer.save_pretrained(best_model_path)
#         print(f"Best model saved with F1 score: {best_f1:.4f}")
    return metrics


#Training loop.
def train(model_name):
    #model_name = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"

    #Load the models. Adjust max instance length to fit your machine.
    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=1024, use_safetensors = True)
    model = AutoModelForSequenceClassification.from_pretrained('/kaggle/input/finetuned',
                                 num_labels=2, ignore_mismatched_sizes=True)
#     model = AutoModelForSequenceClassification.from_pretrained(model_name,
#                                  num_labels=2, ignore_mismatched_sizes=True)
#     model.load_state_dict(torch.load('/kaggle/input/finetuned/model.safetensors'))


    #Generate joint claim+[SEP]+evidence data.
    joint_train, labels_train = generate_nli_data(TRAIN_PATH)
    joint_dev, labels_dev= generate_nli_data(DEV_PATH)

    #Tokenize the data.    
    encoded_train = tokenizer(joint_train, return_tensors='pt',
                         truncation_strategy='only_first', add_special_tokens=True, padding=True)
    encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                         truncation_strategy='only_first', add_special_tokens=True, padding=True)
   
    #Convert data into datasets
    train_dataset = CtDataset(encoded_train, labels_train)
    dev_dataset = CtDataset(encoded_dev, labels_dev)

    #Define the batch size to fit your GPU memory.
    batch_size = 8
#     print(joint_train)

    logging_steps = len(joint_train) // batch_size
    output_name = f"finetuned-model"

    training_args = TrainingArguments(output_dir=output_name,
                                 per_device_train_batch_size=batch_size,
                                 per_device_eval_batch_size=batch_size,
                                 
                                 #for faster training time
                                 dataloader_pin_memory=True, 
                                 dataloader_num_workers=4,
                                 gradient_accumulation_steps=2,
                                 fp16=True,

                                 #training hyperparameters
                                 num_train_epochs=30,
                                 learning_rate=2e-05,
                                 weight_decay=0.07,
                                 warmup_ratio=0.1,

                                 #other parameters
                                 evaluation_strategy="epoch",
                                 save_strategy="no",
                                 disable_tqdm=False,
                                 logging_steps=logging_steps,
                                 push_to_hub=False)

    trainer = Trainer(model=model, args=training_args,
                    compute_metrics=compute_metrics,
                    train_dataset=train_dataset,
                    eval_dataset=dev_dataset,
                    tokenizer=tokenizer )
    print(f"Number of epochs: {trainer.args.num_train_epochs}")
    print(f"Batch size: {trainer.args.per_device_train_batch_size}")
    print(f"Learning rate: {trainer.args.learning_rate}")

    #Start the training process.
    trainer.train()

    #Save the fine-tuned NLI (textual entailment) model.
    trainer.save_model("model-nli")

2024-04-28 21:08:44.337011: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-28 21:08:44.337069: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-28 21:08:44.338525: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
#train('MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli')
#train('domenicrosati/debertav3small-NLI4CT')
train('MoritzLaurer/DeBERTa-v3-small-mnli-fever-docnli-ling-2c')

  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Number of epochs: 30
Batch size: 8
Learning rate: 2e-05


[34m[1mwandb[0m: Currently logged in as: [33msrijaalla10[0m ([33mlalla-ub[0m). Use [1m`wandb login --relogin`[0m to force relogin


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Faithfulness,Consistency
0,No log,0.720649,0.505,0.503185,0.79,0.461239,0.495,0.505
2,0.803400,0.654574,0.595,0.579832,0.69,0.591312,0.405,0.595
4,0.680900,0.613573,0.62,0.657895,0.5,0.614448,0.38,0.62
6,0.606600,0.668061,0.63,0.604839,0.75,0.624594,0.37,0.63
8,0.468300,0.806576,0.67,0.677083,0.65,0.669868,0.33,0.67
10,0.372100,0.949617,0.665,0.660194,0.68,0.664925,0.335,0.665
12,0.280900,1.158722,0.655,0.63964,0.71,0.653953,0.345,0.655
14,0.219900,1.113714,0.69,0.702128,0.66,0.689721,0.31,0.69
16,0.189000,1.453788,0.65,0.65,0.65,0.65,0.35,0.65
18,0.155300,1.667954,0.665,0.685393,0.61,0.663984,0.335,0.665


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encod

In [15]:
tokenizer = AutoTokenizer.from_pretrained('MoritzLaurer/DeBERTa-v3-small-mnli-fever-docnli-ling-2c', model_max_length=1024, use_safetensors = True)
model = AutoModelForSequenceClassification.from_pretrained('/kaggle/working/model-nli',num_labels=2, ignore_mismatched_sizes=True)



In [16]:
 #Define the batch size to fit your GPU memory.
DEV_PATH = '/kaggle/input/extra-sebis/Numerical_Statements.json'
joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
encoded_train = tokenizer(joint_train, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8

logging_steps = len(joint_train) // batch_size
output_name = f"finetuned-model-eval"
training_args = TrainingArguments(output_dir=output_name,
                                 per_device_train_batch_size=batch_size,
                                 per_device_eval_batch_size=batch_size,
                                 
                                 #for faster training time
                                 dataloader_pin_memory=True, 
                                 dataloader_num_workers=4,
                                 gradient_accumulation_steps=2,
                                 fp16=True,

                                 #training hyperparameters
                                 num_train_epochs=30,
                                 learning_rate=2e-05,
                                 weight_decay=0.07,
                                 warmup_ratio=0.1,

                                 #other parameters
                                 evaluation_strategy="epoch",
                                 save_strategy="no",
                                 disable_tqdm=False,
                                 logging_steps=logging_steps,
                                 push_to_hub=False)

trainer = Trainer(model=model, args=training_args,
                    compute_metrics=compute_metrics,
                    train_dataset=train_dataset,
                    eval_dataset=dev_dataset,
                    tokenizer=tokenizer )

  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [17]:
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
print(f"Evaluation metrics on new dataset: {new_eval_metrics}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Evaluation metrics on new dataset: {'eval_loss': 2.8241848945617676, 'eval_accuracy': 0.6451612903225806, 'eval_precision': 0.6046511627906976, 'eval_recall': 0.6190476190476191, 'eval_f1': 0.6454919495744642, 'eval_faithfulness': 0.3548387096774194, 'eval_consistency': 0.6451612903225806, 'eval_runtime': 1.8007, 'eval_samples_per_second': 51.647, 'eval_steps_per_second': 6.664}


In [18]:
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.824185,0.645161,0.604651,0.619048,0.645492,0.354839,0.645161


In [19]:
DEV_PATH = '/kaggle/input/extra-sebis/Non_Numerical_Statements.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)



  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.476839,0.663551,0.661765,0.775862,0.657786,0.336449,0.663551


In [20]:
DEV_PATH = '/kaggle/input/extra-sebis/Comparison.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.00234,0.6,0.588235,0.666667,0.598214,0.4,0.6


In [21]:
DEV_PATH = '/kaggle/input/extra-sebis/Adverse Events.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.923334,0.596154,0.580645,0.692308,0.592385,0.403846,0.596154


In [22]:
DEV_PATH = '/kaggle/input/extra-sebis/Eligibility.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,1.768191,0.75,0.733333,0.785714,0.749681,0.25,0.75


In [23]:
DEV_PATH = '/kaggle/input/extra-sebis/Intervention.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.985244,0.583333,0.578947,0.611111,0.583012,0.416667,0.583333


In [24]:
DEV_PATH = '/kaggle/input/extra-sebis/Results.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + secondary_cts[claim_id] + ".json"
  for i in secondary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) 

Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,3.020997,0.660714,0.645161,0.714286,0.659738,0.339286,0.660714


In [25]:
DEV_PATH = '/kaggle/input/extra-sebis/single.json'
# joint_train, labels_train = generate_nli_data(TRAIN_PATH)
joint_dev, labels_dev= generate_nli_data(DEV_PATH)

        #Tokenize the data.    
# encoded_train = tokenizer(joint_train, return_tensors='pt',
#                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
encoded_dev = tokenizer(joint_dev, return_tensors='pt',
                             truncation_strategy='only_first', add_special_tokens=True, padding=True)

        #Convert data into datasets
# train_dataset = CtDataset(encoded_train, labels_train)
dev_dataset = CtDataset(encoded_dev, labels_dev)
batch_size = 8
new_eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
results = []
# for epoch in range(trainer.args.num_train_epochs):
for epoch in range(1):
    eval_metrics = trainer.evaluate(eval_dataset=dev_dataset)
    results.append([
        eval_metrics['eval_loss'],
        eval_metrics['eval_accuracy'],
        eval_metrics['eval_precision'],
        eval_metrics['eval_recall'],
        eval_metrics['eval_f1'],
        eval_metrics['eval_faithfulness'],
        eval_metrics['eval_consistency'],        
    ])
columns = ['VAL loss', 'ACC', 'PREC', 'REC', 'F1', 'Faithfulness', 'Consistency']
pd.DataFrame(results, columns=columns)


  file_name = "/kaggle/input/nlp-project-dataset/Training data/CT json/" + primary_cts[claim_id] + ".json"
  for i in primary_indices[claim_id]:
  evidence += data[sections[claim_id]][i]
  if types[claim_id] == "Comparison":
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Unnamed: 0,VAL loss,ACC,PREC,REC,F1,Faithfulness,Consistency
0,2.482361,0.678571,0.662338,0.728571,0.677766,0.321429,0.678571
