## Load data (Cliams and evidences)

In [1]:
data_path = "data"

In [2]:
import pandas as pd
import torch
import numpy as np

##evidences
evidence = pd.read_json(f'{data_path}/evidence.json',orient='index')
evidences = evidence.iloc[:,0].tolist()

#development data
dev_data = pd.read_json(f'{data_path}/dev-claims.json',orient='index')
dev_claims = dev_data.iloc[:,0].tolist()

#training data
train_data = pd.read_json(f'{data_path}/train-claims.json',orient='index')
train_claims = train_data.iloc[:,0].tolist()

#testing data
test_data = pd.read_json(f'{data_path}/test-claims-unlabelled.json',orient='index')
test_claims = test_data.iloc[:,0].tolist()

dev_baseline = pd.read_json(f'{data_path}/dev-claims-baseline.json',orient='index')


### Print out the Data structure

In [3]:
print(f"Training Data columns: {train_data.columns}")
print(f"Shape: {train_data.shape}\n")

print(f"Development Data columns: {dev_data.columns}")
print(f"Shape: {dev_data.shape}\n")

print(f"Testing Data columns: {test_data.columns}")
print(f"Shape: {test_data.shape}\n")


Training Data columns: Index(['claim_text', 'claim_label', 'evidences'], dtype='object')
Shape: (1228, 3)

Development Data columns: Index(['claim_text', 'claim_label', 'evidences'], dtype='object')
Shape: (154, 3)

Testing Data columns: Index(['claim_text'], dtype='object')
Shape: (153, 1)



# Retriever

## DPR 


In [39]:
import torch
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
from tqdm import tqdm


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def evidences_dpr_embedding(evidences,batch_size=64):

  # Load the models to GPU
  context_encoder = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base').to(device)
  context_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

  context_embeddings = []

  # Encoding
  for i in tqdm(range(0, len(evidences), batch_size), desc="Encoding Evidences"):
      batch = evidences[i:i+batch_size]
      inputs = context_tokenizer(batch, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)  # 将输入移动到 GPU

      with torch.no_grad():
          with torch.cuda.amp.autocast():
              embeddings = context_encoder(**inputs).pooler_output
          context_embeddings.append(embeddings.cpu())

  #return embedding result as tensor
  context_embeddings = torch.cat(context_embeddings)
  return context_embeddings

def claims_dpr_embedding(claims,batch_size=64):

  # Load the models to GPU
  question_encoder = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base').to(device)
  question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')

  question_embeddings = []

  # Encoding
  for i in tqdm(range(0, len(claims), batch_size), desc="Encoding Claims"):
      batch = claims[i:i+batch_size]
      inputs = question_tokenizer(batch, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)

      with torch.no_grad():
          with torch.cuda.amp.autocast():
              embeddings = question_encoder(**inputs).pooler_output
              question_embeddings.append(embeddings.cpu())

  #return embedding result as tensor
  question_embeddings = torch.cat(question_embeddings)
  return question_embeddings



### Load DPR Embeddings of evidences and claims as tensors

In [40]:
import torch
import pdb
from torch.nn.functional import cosine_similarity

"""
Calculates cosine similarities between a claim embedding and a set of evidence embeddings,
returning the indices and scores of the top `n` most similar evidence embeddings.

Input:
- claim_embedding: Tensor of shape [1, d], the embedding of the claim.
- evidence_embeddings: Tensor of shape [m, d], the embeddings of the evidence documents.
- n: Integer, the number of top similar evidence to return (default is 5).

Returns:
- top_n_indices: List of integers, indices of the top `n` most similar evidence embeddings.
- top_n_scores: List of floats, cosine similarity scores of the top `n` most similar evidence embeddings.
"""
def find_top_n_evidence(claim_embedding, evidences_embeddings, n=5):
    scores = cosine_similarity(claim_embedding, evidences_embeddings)
    top_n_evidences = torch.topk(scores, k=n)

    top_n_indices = top_n_evidences.indices.tolist()
    top_n_scores = top_n_evidences.values.tolist()

    return top_n_indices, top_n_scores



def dpr_search(claim_index, claim_embeddings, claim_data, evidences_embeddings, n=5):
    claim = claim_embeddings[claim_index].unsqueeze(0).to(device)
    best_evidence_indices, scores = find_top_n_evidence(claim, evidences_embeddings, n)
    print(f"Best evidence indices: {best_evidence_indices}\nScores: {scores}\n")
    print(f"Claim: {claim_data.iloc[claim_index].iloc[0]}\n")
    print(f"Label: {claim_data.iloc[claim_index].iloc[1]}\n")
    print(f"Evidences: {claim_data.iloc[claim_index].iloc[2]}\n")

    print("-- Best evidences --\n")
    for i in best_evidence_indices:
        print(f'evidence - {i}')
        print(evidences[i])
        print("============================")




## BM25

In [12]:
pip install rank_bm25

Note: you may need to restart the kernel to use updated packages.


In [5]:
from rank_bm25 import BM25Okapi
corpus = evidences

tokenized_corpus = [doc.split(" ") for doc in corpus]

bm25 = BM25Okapi(tokenized_corpus)


In [20]:
# for claim in tqdm(dev_claims[:3]):
#   print(claim,'\n')
#   query = claim
#   tokenized_query = query.split(" ")

#   print(bm25.get_top_n(tokenized_query, corpus, n=3))

tokenized_query = "hello world".split(" ")
print(bm25.get_top_n(tokenized_query, corpus, n=1))
scores = bm25.get_scores(tokenized_query)
sorted_indexes = np.argsort(scores)[::-1]  

print(sorted_indexes)
print(evidences[sorted_indexes[0]])




["After hearing Karim's opinion, Salhi would no longer speak with him other than to say hello and goodbye."]
[323573 218762  77646 ... 804894 804895 804886]
After hearing Karim's opinion, Salhi would no longer speak with him other than to say hello and goodbye.


In [None]:
print(all_bm25_findings.size)
dev_evidences_list = np.concatenate(dev_data.iloc[:, 2].apply(lambda ev_list: [int(evidence.split('-')[1]) for evidence in ev_list]).values)

print(f"Match rate of n = {5}")
print(f"matched evidence/all finded evidence = {np.intersect1d(all_bm25_findings, dev_evidences_list).size/findings.size}")
print(f"matched evidence/number of evidence = {np.intersect1d(all_bm25_findings, dev_evidences_list).size/dev_evidences_list.size}")

770
Match rate of n = 5
matched evidence/all finded evidence = 0.07402597402597402
matched evidence/number of evidence = 0.11608961303462322


## Sentence-BERT

### Generating SBERT Embedding for claims and evidences

In [22]:
pip install -U sentence-transformers


Note: you may need to restart the kernel to use updated packages.


**Evidences** **Embedding**

In [23]:
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def sbert_embedding(sentences,batch_size=128):
  # 1. Load  Sentence BERT model
  model = SentenceTransformer("all-MiniLM-L6-v2").to(device)

  dataloader = DataLoader(sentences, batch_size, shuffle=False)

  embeddings = []

  for batch in tqdm(dataloader, desc="Calculating embeddings in batches"):
      batch_embeddings = model.encode(batch, convert_to_tensor=True, device=device)
      embeddings.append(batch_embeddings)

  embeddings_tensor = torch.cat(embeddings, dim=0)
  return embeddings_tensor



#torch.save(sbert_embedding(evidences), 'SBERT_evidences_embeddings.pt')


# Classifer

## BERT-Based

### Load the necessary python packages

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup, AlbertTokenizer, AlbertForSequenceClassification
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm
import seaborn as sns
from sklearn.metrics import classification_report, accuracy_score, f1_score
from transformers import RobertaTokenizer, RobertaModel, RobertaForSequenceClassification
import os
from datetime import datetime

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gpu_name = torch.cuda.get_device_name(0)
print(gpu_name)

  from .autonotebook import tqdm as notebook_tqdm


NVIDIA GeForce RTX 4090


In [25]:
# RoBerta model for finetuning
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3).to(device)

# Bert base model for finetuning
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Data Preprocessing

In [27]:
def extract_evidence_pairs_with_content(df, evidences_list, labels_of_interest=['SUPPORTS', 'REFUTES', 'NOT_ENOUGH_INFO']):

    result_pairs = []

    for index, row in df.iterrows():
        claim_text = row['claim_text']
        claim_label = row['claim_label']
        if claim_label in labels_of_interest:
            evidence_ids = row['evidences']
            for evidence_id in evidence_ids:
                evidence_content = evidences_list[int(evidence_id.split('-')[1])]
                result_pairs.append((claim_text, evidence_content, claim_label))

    result_df = pd.DataFrame(result_pairs, columns=['claim_text', 'evidence', 'label'])

    return result_df

class ClaimEvidenceDataset(Dataset):
    def __init__(self, data, tokenizer, max_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        claim_text = str(self.data.iloc[idx]['claim_text'])
        evidence = str(self.data.iloc[idx]['evidence'])
        label = self.data.iloc[idx]['label']

        encoding = self.tokenizer.encode_plus(
            claim_text,
            evidence,
            add_special_tokens=True,
            max_length=256,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            #'token_type_ids': encoding['token_type_ids'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }



model_train_data, model_val_data = train_test_split(train_data, test_size=0.1, random_state=12)

pair_training_data = extract_evidence_pairs_with_content(model_train_data, evidences)
pair_val_data = extract_evidence_pairs_with_content(model_val_data, evidences)
pair_dev_data = extract_evidence_pairs_with_content(dev_data, evidences)

label_map = {"SUPPORTS": 0, "REFUTES": 1, "NOT_ENOUGH_INFO": 2}
pair_training_data['label'] = pair_training_data['label'].map(label_map)
pair_val_data['label'] = pair_val_data['label'].map(label_map)
pair_dev_data['label'] = pair_dev_data['label'].map(label_map)


train_dataset = ClaimEvidenceDataset(pair_training_data, tokenizer)
val_dataset = ClaimEvidenceDataset(pair_val_data, tokenizer)
dev_dataset = ClaimEvidenceDataset(pair_dev_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=False)


In [22]:
# pair_training_data.to_json('/content/drive/MyDrive/Colab Notebooks/NLP project/FactChecker_NLP/pair_training_data.json')
pair_training_data.head()

Unnamed: 0,claim_text,evidence,label
0,"With that in mind, they propose a plausible an...",Mind Meld: Secrets Behind the Voyage of a Life...,2
1,"With that in mind, they propose a plausible an...",Naturalistic dualism comes from Australian phi...,2
2,"With that in mind, they propose a plausible an...",Chalmers' argument is that it seems plausible ...,2
3,"With that in mind, they propose a plausible an...",The quantum mind or quantum consciousness is a...,2
4,"With that in mind, they propose a plausible an...",He proposed a scenario with a cat in a locked ...,2


### BERT Model Training (Methods define)

In [25]:

def train_epoch(model, data_loader, optimizer, scheduler, device):
    model = model.train()
    losses = []
    correct_predictions = 0

    loop = tqdm(data_loader, leave=True)

    for batch in loop:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        #token_type_ids = batch['token_type_ids'].to(device)
        labels = batch['label'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            #token_type_ids=token_type_ids,
            labels=labels
        )

        loss = outputs.loss
        logits = outputs.logits

        _, preds = torch.max(logits, dim=1)
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        loop.set_description(f"Train Epoch [{epoch + 1}]")
        loop.set_postfix(loss=loss.item())

    return correct_predictions.double() / len(data_loader.dataset), np.mean(losses)

def eval_model(model, data_loader, device):
    model.eval()
    losses = []
    correct_predictions = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            #token_type_ids = batch['token_type_ids'].to(device)
            labels = batch['label'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                #token_type_ids=token_type_ids,
                labels=labels
            )

            loss = outputs.loss
            logits = outputs.logits

            _, preds = torch.max(logits, dim=1)
            correct_predictions += torch.sum(preds == labels)
            losses.append(loss.item())

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')  # 计算加权平均的F1分数
    report = classification_report(all_labels, all_preds, target_names=['SUPPORTS', 'REFUTES', 'NOT_ENOUGH_INFO'])

    return accuracy, np.mean(losses), f1, report



def save_model_and_metrics(model, optimizer, epoch, train_loss, train_accuracy, val_loss, val_accuracy, val_f1, val_report, save_dir, model_name):
    # Create save directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Generate timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Prepare the data to be saved
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'train_accuracy': train_accuracy,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'val_f1': val_f1,
        'val_report': val_report
    }

    # Save the checkpoint
    model_path = os.path.join(save_dir, f"{model_name}_epoch-{epoch}.pt")
    torch.save(checkpoint, model_path)
    print(f"Model and metrics saved to {model_path}")

    return model_path


def load_model_and_metrics(model, optimizer, filepath):
    checkpoint = torch.load(filepath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    # Print metrics
    print(f"Epoch: {checkpoint['epoch']}")
    print(f"Train Loss: {checkpoint['train_loss']:.4f}")
    print(f"Train Accuracy: {checkpoint['train_accuracy']:.4f}")
    print(f"Validation Loss: {checkpoint['val_loss']:.4f}")
    print(f"Validation Accuracy: {checkpoint['val_accuracy']:.4f}")
    print(f"Validation F1 Score: {checkpoint['val_f1']:.4f}")
    print("Validation Report:")
    print(checkpoint['val_report'])

    return model, optimizer





### BERT Model Training (Hyper parms define, actual training, save the model to local)

In [None]:
import matplotlib.pyplot as plt
from IPython.display import clear_output
from sklearn.metrics import classification_report, accuracy_score, f1_score


# Training loop
epochs = 10
patience = 3  # Number of epochs to wait for improvement before early stopping
best_val_acc = 0
no_improve_epoch = 0

# Set Optimizer and Learning Rate Scheduler
optimizer = AdamW(model.parameters(), lr=5e-6, eps=1e-8)

total_steps = len(train_loader) * epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.2*total_steps),
    num_training_steps=total_steps
)

for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}')
    print('-' * 20)

    # Train the model
    train_acc, train_loss = train_epoch(
        model, train_loader, optimizer, scheduler, device
    )

    print(f'Train loss {train_loss} accuracy {train_acc}')

    # Validate the model
    val_acc, val_loss, val_f1, val_report = eval_model(
        model, val_loader, device
    )

    print(f'Val   loss {val_loss} accuracy {val_acc}')
    print(f'Val F1 Score: {val_f1}')
    print(f'Val Classification Report:\n{val_report}')

    dev_acc, dev_loss, dev_f1, dev_report = eval_model(
        model, dev_loader, device
    )

    print(f'Dev accuracy {dev_acc}')
    print(f'Dev F1 Score: {dev_f1}')
    print(f'Dev Classification Report:\n{dev_report}')

    # Check if this is the best model (based on validation accuracy)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        no_improve_epoch = 0
        # Save the best model
        best_model_path = save_model_and_metrics(
            model, optimizer, epoch, train_loss, train_acc,
            val_loss, val_acc, val_f1, val_report,
            save_dir="./model_checkpoints",
            model_name=type(model).__name__
        )
        print(f"New best model saved with validation accuracy: {val_acc}")
    else:
        no_improve_epoch += 1

    # Early stopping
    if no_improve_epoch >= patience:
        print(f"Early stopping triggered. No improvement for {patience} epochs.")
        break

print("Training completed.")
print(f"Best validation accuracy: {best_val_acc}")
print(f"Best model saved at: {best_model_path}")


# Prediction 

In [27]:
from sentence_transformers import util

In [29]:
def check_array(arr):
    contains_0 = False
    contains_1 = False
    contains_2 = False

    for num in arr:
        if num == 0:
            contains_0 = True
        elif num == 1:
            contains_1 = True
        elif num == 2:
            contains_2 = True

    # 判断逻辑
    if contains_0 and contains_1:
        return 3
    elif contains_0:
        return 0
    elif contains_1:
        return 1
    elif contains_2 and not contains_0 and not contains_1:
        return 2


In [48]:
dev_data = pd.read_json(f'{data_path}/dev-claims.json',orient='index')
dev_claims = dev_data.iloc[:,0].tolist()
dev_data_pred = dev_data.copy(deep=True)

#SBERT retriever loading
SBERT_evidence_embeddings = torch.load('SBERT_evidences_embeddings.pt').to(device)
claims_embedding = sbert_embedding(dev_claims).to(device)

#DPR retriver loading
# DPR_evidence_embeddings = torch.load(f'{data_path}/dpr_evidences_embeddings.pt').to(device)
# DPR_claims_embeddings = claims_dpr_embedding(dev_claims).to(device)

#Roberta classifer model loading
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3).to(device)
optimizer = AdamW(model.parameters(), lr=5e-6, eps=1e-8)
best_model_path = "model_checkpoints/RobertaForSequenceClassification_epoch-3.pt"
best_model, _ = load_model_and_metrics(model, optimizer, best_model_path)

# bm25,sbert or dpr (Retrievers)
method = 'sbert'
k = 4   #find the k nearest evidences

for index, row in tqdm(dev_data.iterrows()):
    row_loc = dev_data.index.get_loc(index)
    claim_text = row['claim_text']
    claim_label = ''
    label_map = {0: "SUPPORTS", 1: "REFUTES", 2: "NOT_ENOUGH_INFO", 3: "DISPUTED"}

    # print(f"{row['claim_text']},{row['evidences']},{row['claim_label']}")
    vote = []

    if method == 'sbert':
        claim_embedding = claims_embedding[row_loc].unsqueeze(0)
        cosine_similarities = util.cos_sim(claim_embedding, SBERT_evidence_embeddings)

        top_k_values, top_k_indices = torch.topk(cosine_similarities, k=k, dim=1)
        top_k_indices = top_k_indices.squeeze(0).tolist()

    elif method == 'bm25':
        query_token = claim_text.split(" ")
        scores = bm25.get_scores(query_token)
        top_k_indices = np.argsort(scores)[::-1][:k]
    
    elif method == 'dpr':
        dpr_claim_embedding = DPR_claims_embeddings[row_loc].unsqueeze(0)
        top_k_indices, _ = find_top_n_evidence(dpr_claim_embedding, DPR_evidence_embeddings, n=k)

    for evd_index in top_k_indices:
        inputs = tokenizer(claim_text, evidences[evd_index], padding=True, truncation=True, return_tensors="pt").to(device)
        best_model.eval()
        with torch.no_grad():
            outputs = best_model(**inputs)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)
            vote.append(predictions.item())

    
    result = check_array(vote)
    evd_result = []
    if result == 2:
        evd_result = top_k_indices
    elif result == 0:
        evd_locs = [i for i in range(len(vote)) if vote[i] == 0]
        evd_result = [top_k_indices[i] for i in evd_locs]
    elif result == 1:
        evd_locs = [i for i in range(len(vote)) if vote[i] == 1]
        evd_result = [top_k_indices[i] for i in evd_locs]
    elif result == 3:
        evd_locs = [i for i in range(len(vote)) if vote[i] != 2]
        evd_result = [top_k_indices[i] for i in evd_locs]

    
    evd_result = [f'evidence-{num}' for num in evd_result]
    claim_label = label_map.get(result)
    dev_data_pred.loc[index, 'claim_label'] = claim_label
    dev_data_pred.loc[index, 'evidences'] = evd_result
print(f'prediction completed on : {method}')


  SBERT_evidence_embeddings = torch.load('SBERT_evidences_embeddings.pt').to(device)


Calculating embeddings in batches: 100%|██████████| 2/2 [00:00<00:00, 32.05it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  checkpoint = torch.load(filepath)


Epoch: 3
Train Loss: 0.5159
Train Accuracy: 0.8098
Validation Loss: 0.8772
Validation Accuracy: 0.6528
Validation F1 Score: 0.6499
Validation Report:
                 precision    recall  f1-score   support

       SUPPORTS       0.72      0.64      0.68       140
        REFUTES       0.53      0.43      0.48        56
NOT_ENOUGH_INFO       0.64      0.73      0.68       190

       accuracy                           0.65       386
      macro avg       0.63      0.60      0.61       386
   weighted avg       0.65      0.65      0.65       386



3it [00:00, 22.91it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


9it [00:00, 23.22it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


12it [00:00, 23.18it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


15it [00:00, 23.47it/s]

prediction completed on : sbert


18it [00:00, 23.46it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


24it [00:01, 23.65it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


27it [00:01, 23.54it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


30it [00:01, 23.55it/s]

prediction completed on : sbert


33it [00:01, 23.65it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


39it [00:01, 23.63it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


42it [00:01, 23.55it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


45it [00:01, 23.59it/s]

prediction completed on : sbert


48it [00:02, 23.62it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


54it [00:02, 23.65it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


57it [00:02, 23.65it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


60it [00:02, 23.64it/s]

prediction completed on : sbert


63it [00:02, 23.56it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


69it [00:02, 23.71it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


72it [00:03, 23.64it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


75it [00:03, 23.63it/s]

prediction completed on : sbert


78it [00:03, 23.74it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


84it [00:03, 23.74it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


87it [00:03, 23.69it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


90it [00:03, 23.67it/s]

prediction completed on : sbert


93it [00:03, 23.65it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


99it [00:04, 23.85it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


102it [00:04, 23.87it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


105it [00:04, 23.80it/s]

prediction completed on : sbert


108it [00:04, 23.86it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


114it [00:04, 23.94it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


117it [00:04, 23.81it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


120it [00:05, 23.82it/s]

prediction completed on : sbert


123it [00:05, 23.91it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


129it [00:05, 23.93it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


132it [00:05, 23.87it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


135it [00:05, 23.90it/s]

prediction completed on : sbert


138it [00:05, 23.83it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


144it [00:06, 23.92it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


147it [00:06, 23.81it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert


150it [00:06, 23.82it/s]

prediction completed on : sbert


154it [00:06, 23.70it/s]

prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert
prediction completed on : sbert





In [47]:
dev_data_pred.to_json('data/dev-pred-dpr_roberta_k_5.json',orient='index')