# 5.3 Classifier BERT

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Read in Data

In [2]:
import json
import numpy as np
import pickle
import gc

In [3]:
with open ('./drive/My Drive/LAB/COMP90042 A3/data/curated/train_claims2.json') as f:
    train_claims = json.load(f)

In [4]:
with open ('./drive/My Drive/LAB/COMP90042 A3/data/raw/dev-claims.json') as f:
    dev_claims = json.load(f)

In [5]:
with open ('./drive/My Drive/LAB/COMP90042 A3/data/curated/test_claims2.json') as f:
    test_claims = json.load(f)

In [6]:
with open ('./drive/My Drive/LAB/COMP90042 A3/data/raw/evidence.json') as f:
    evidence = json.load(f)

In [7]:
with open ('./drive/My Drive/LAB/COMP90042 A3/data/raw/test-claims-unlabelled.json') as f:
    future_claims = json.load(f)

In [8]:
import random
random.seed(19260817)

In [9]:
evid_id_list = [evid_id for evid_id in evidence]

### Create dataset used to train retriever

In [10]:
ENCODING = {'REFUTES': 0, 'DISPUTED': 1, 'NOT_ENOUGH_INFO': 2, 'SUPPORTS': 3}
DECODING = {ENCODING[key]:key for key in ENCODING}

In [11]:
training_data = []

for id in train_claims:

  claim_text = train_claims[id]['claim_text']

  label = ENCODING[train_claims[id]['claim_label']] # LABELS MUST BE ENCODED!!!!!!

  n_evid = len(train_claims[id]['evidences'])
  
  for evid_id in train_claims[id]['evidences']:
    evid_text = evidence[evid_id]

    training_data.append(((claim_text, evid_text), label))

In [12]:
dev_data = []

for id in dev_claims:

  claim_text = dev_claims[id]['claim_text']

  label = ENCODING[dev_claims[id]['claim_label']] # LABELS MUST BE ENCODED!!!!!!

  n_evid = len(dev_claims[id]['evidences'])
  
  for evid_id in dev_claims[id]['evidences']:
    evid_text = evidence[evid_id]
    
    dev_data.append(((claim_text, evid_text), label))

In [13]:
test_data = []

for id in test_claims:

  claim_text = test_claims[id]['claim_text']

  label = ENCODING[test_claims[id]['claim_label']] # LABELS MUST BE ENCODED!!!!!!

  n_evid = len(test_claims[id]['evidences'])
  
  for evid_id in test_claims[id]['evidences']:
    evid_text = evidence[evid_id]
    
    test_data.append(((claim_text, evid_text), label))

In [14]:
scientific_claims_id = set()
for claim in train_claims:
  for evid in train_claims[claim]['evidences']:
    scientific_claims_id.add(evid)

for claim in dev_claims:
  for evid in dev_claims[claim]['evidences']:
    scientific_claims_id.add(evid)

for claim in test_claims:
  for evid in test_claims[claim]['evidences']:
    scientific_claims_id.add(evid)


scientific_claims_id = list(scientific_claims_id)

In [15]:
!pip install torch torchvision transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m78.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m108.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


## Build model, dataloader etc

In [16]:
from transformers import BertModel

In [18]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [19]:
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer
import pandas as pd

class Dataset():

    def __init__(self, data, maxlen):

        #Store the contents of the file in a pandas dataframe
        self.data = data

        #Initialize the BERT tokenizer
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

        self.maxlen = maxlen

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):

        #Selecting the sentence and label at the specified index in the data frame
        sentence1 = self.data[index][0][0]
        sentence2 = self.data[index][0][1]
        label = self.data[index][1]

        #Preprocessing the text to be suitable for BERT
        tokens1 = tokenizer.tokenize(sentence1)
        tokens2 = tokenizer.tokenize(sentence2)
        tokens1 = ['[CLS]'] + tokens1 + ['[SEP]']
        tokens2 = tokens2 + ['[SEP]']
        tokens = tokens1 + tokens2 #Insering the CLS and SEP token in the beginning and end of the sentence
        if len(tokens) < self.maxlen:
            tokens = tokens + ['[PAD]' for _ in range(self.maxlen - len(tokens))] #Padding sentences
        else:
            tokens = tokens[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length

        tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens) #Obtaining the indices of the tokens in the BERT Vocabulary
        tokens_ids_tensor = torch.tensor(tokens_ids) #Converting the list to a pytorch tensor

        #Obtaining the attention mask i.e a tensor containing 1s for no padded tokens and 0s for padded ones
        attn_mask = (tokens_ids_tensor != 0).long()

        seg_ids = [0 for _ in range(len(tokens1))]
        seg_ids2 = [1 for _ in range(self.maxlen-len(tokens1))]
        seg_ids.extend(seg_ids2)

        seg_ids_tensor = torch.tensor(seg_ids)


        return tokens_ids_tensor, attn_mask, seg_ids_tensor, label

In [20]:
from torch.utils.data import DataLoader

#Creating instances of training and development set
#maxlen sets the maximum length a sentence can have
#any sentence longer than this length is truncated to the maxlen size
train_set = Dataset(training_data, maxlen = 512)
dev_set = Dataset(dev_data, maxlen = 512)
test_set = Dataset(test_data, maxlen = 512)

#Creating intsances of training and development dataloaders
train_loader = DataLoader(train_set, batch_size = 16, shuffle = True, num_workers = 2)
dev_loader = DataLoader(dev_set, batch_size = 16, shuffle = True, num_workers = 2)
test_loader = DataLoader(test_set, batch_size = 16, shuffle = True, num_workers = 2)

In [21]:
import torch
import torch.nn as nn
from transformers import BertModel

class nEvidClassifier(nn.Module):

    def __init__(self):
        super(nEvidClassifier, self).__init__()
        #Instantiating BERT model object 
        self.bert_layer = BertModel.from_pretrained('bert-base-uncased')
        
        #Classification layer
        #input dimension is 768 because [CLS] embedding has a dimension of 768
        #output dimension is 1 because we're working with a binary classification problem
        self.cls_layer = nn.Linear(768, 30)

        self.cls_layer2 = nn.Linear(30, 4)

        self.softmax_layer = nn.Softmax(dim=4)

    def forward(self, seq, attn_masks, seg_ids):
        '''
        Inputs:
            -seq : Tensor of shape [B, T] containing token ids of sequences
            -attn_masks : Tensor of shape [B, T] containing attention masks to be used to avoid contibution of PAD tokens
        '''

        #Feeding the input to BERT model to obtain contextualized representations
        outputs = self.bert_layer(seq, attention_mask = attn_masks, token_type_ids = seg_ids, return_dict=True)
        cont_reps = outputs.last_hidden_state

        #Obtaining the representation of [CLS] head (the first token)
        cls_rep = cont_reps[:, 0]

        #Feeding cls_rep to the classifier layer
        output = self.cls_layer(cls_rep)

        output = self.cls_layer2(output)

        return output

In [22]:
gpu = 0 #gpu ID

print("Creating the sentiment regressor, initialised with pretrained BERT-BASE parameters...")
net = nEvidClassifier()
net.cuda(gpu) #Enable gpu support for the model
print("Done creating the sentiment regressor.")

Creating the sentiment regressor, initialised with pretrained BERT-BASE parameters...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Done creating the sentiment regressor.


### Setup Training

In [None]:
import torch.nn as nn
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
opti = optim.Adam(net.parameters(), lr = 2e-5)

In [None]:
def get_accuracy_from_logits(pseudo_probs, labels):

    correct = 0
    total = 0
    for i in range(len(labels)):
        _, predicted = torch.max(pseudo_probs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    return correct/total

    # index = torch.argmax(pseudo_probs.unsqueeze(-1))
    # soft_probs = (probs > 0.5).long()
    # acc = (soft_probs.squeeze() == labels).float().mean()
    # return acc
    

def evaluate(net, criterion, dataloader, gpu):
    net.eval()

    mean_acc, mean_loss = 0, 0
    count = 0
    acc = 0

    with torch.no_grad():
        for seq, attn_masks, seg_ids_tensor, labels in dataloader:
            seq, attn_masks, seg_ids_tensor, labels = seq.cuda(gpu), attn_masks.cuda(gpu), seg_ids_tensor.cuda(gpu), labels.cuda(gpu)
            output = net(seq, attn_masks, seg_ids_tensor)
            _, predicted = torch.max(output.data, 1)
            acc += (predicted == labels).sum().item() / len(labels)
            mean_loss += criterion(output, labels).item()
            count += 1

    return acc / count, mean_loss / count

In [None]:
import time

def train(net, criterion, opti, train_loader, dev_loader, max_eps, gpu):

    best_acc = 0
    st = time.time()
    for ep in range(max_eps):
        
        net.train()
        for it, (seq, attn_masks, seg_ids_tensor, labels) in enumerate(train_loader):
            #Clear gradients
            opti.zero_grad()  
            #Converting these to cuda tensors
            seq, attn_masks, seg_ids_tensor, labels = seq.cuda(gpu), attn_masks.cuda(gpu), seg_ids_tensor.cuda(gpu), labels.cuda(gpu)

            #Obtaining the logits from the model
            output = net(seq, attn_masks, seg_ids_tensor)

            #Computing loss
            loss = criterion(output, labels)

            #Backpropagating the gradients
            loss.backward()

            #Optimization step
            opti.step()
              
            if it % 100 == 0:
                
                _, predicted = torch.max(output.data, 1)
                acc = (predicted == labels).sum().item() / len(labels)
                print("Iteration {} of epoch {} complete. Loss: {}; Accuracy: {}; Time taken (s): {}".format(it, ep, loss.item(), acc, (time.time()-st)))
                st = time.time()

        
        dev_acc, dev_loss = evaluate(net, criterion, dev_loader, gpu)
        print("Epoch {} complete! Development Accuracy: {}; Development Loss: {}".format(ep, dev_acc, dev_loss))
        if dev_acc > best_acc:
            print("Best development accuracy improved from {} to {}, saving model...".format(best_acc, dev_acc))
            best_acc = dev_acc
            torch.save(net.state_dict(), './drive/My Drive/LAB/COMP90042 A3/models/Classifiers/Classifier_Bert_2.dat')
            torch.save(net, './drive/My Drive/LAB/COMP90042 A3/models/Classifiers/Classifier_Bert_2.pt')

In [None]:
torch.cuda.empty_cache() 

In [None]:
num_epoch = 5

#fine-tune the model
train(net, criterion, opti, train_loader, dev_loader, num_epoch, gpu)

Iteration 0 of epoch 0 complete. Loss: 0.0645977333188057; Accuracy: 1.0; Time taken (s): 2.558573007583618
Iteration 100 of epoch 0 complete. Loss: 0.01187062542885542; Accuracy: 1.0; Time taken (s): 144.55616903305054
Iteration 200 of epoch 0 complete. Loss: 0.015184145420789719; Accuracy: 1.0; Time taken (s): 146.93218541145325
Epoch 0 complete! Development Accuracy: 0.5249266862170088; Development Loss: 2.312257424477608
Best development accuracy improved from 0 to 0.5249266862170088, saving model...
Iteration 0 of epoch 1 complete. Loss: 0.020845942199230194; Accuracy: 1.0; Time taken (s): 68.69047808647156
Iteration 100 of epoch 1 complete. Loss: 0.1319212168455124; Accuracy: 0.9375; Time taken (s): 147.49619603157043
Iteration 200 of epoch 1 complete. Loss: 0.01075851172208786; Accuracy: 1.0; Time taken (s): 147.06881642341614
Epoch 1 complete! Development Accuracy: 0.5399560117302052; Development Loss: 2.32033041215712
Best development accuracy improved from 0.5249266862170088 

## Read In Model

In [None]:
net.load_state_dict(torch.load('./drive/My Drive/LAB/COMP90042 A3/models/Classifiers/Classifier_Bert_2.dat'))

<All keys matched successfully>

In [None]:
net = torch.load('./drive/My Drive/LAB/COMP90042 A3/models/Classifiers/Classifier_Bert_2.pt')

In [None]:
net.eval()

nEvidClassifier(
  (bert_layer): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

## Make Predictions (make Classifications)

In [31]:
class PredictDataset():

    def __init__(self, data, maxlen):

        #Store the contents of the file in a pandas dataframe
        self.data = data

        #Initialize the BERT tokenizer
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

        self.maxlen = maxlen

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):

        #Selecting the sentence and label at the specified index in the data frame
        sentence1 = self.data[index][0]
        sentence2 = self.data[index][1]

        #Preprocessing the text to be suitable for BERT
        tokens1 = tokenizer.tokenize(sentence1)
        tokens2 = tokenizer.tokenize(sentence2)
        tokens1 = ['[CLS]'] + tokens1 + ['[SEP]']
        tokens2 = tokens2 + ['[SEP]']
        tokens = tokens1 + tokens2 #Insering the CLS and SEP token in the beginning and end of the sentence
        if len(tokens) < self.maxlen:
            tokens = tokens + ['[PAD]' for _ in range(self.maxlen - len(tokens))] #Padding sentences
        else:
            tokens = tokens[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length

        tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens) #Obtaining the indices of the tokens in the BERT Vocabulary
        tokens_ids_tensor = torch.tensor(tokens_ids) #Converting the list to a pytorch tensor

        #Obtaining the attention mask i.e a tensor containing 1s for no padded tokens and 0s for padded ones
        attn_mask = (tokens_ids_tensor != 0).long()

        seg_ids = [0 for _ in range(len(tokens1))]
        seg_ids2 = [1 for _ in range(self.maxlen-len(tokens1))]
        seg_ids.extend(seg_ids2)

        seg_ids_tensor = torch.tensor(seg_ids)


        return tokens_ids_tensor, attn_mask, seg_ids_tensor

In [28]:
def get_classif(claims, file_name, SIZE):


  try:
    with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Retrievals/{file_name}.pickle', 'rb') as f:
      predictions = pickle.load(f)
  except:
      predictions = {}


  i = 0
  for id in claims:
    if id in predictions:
      print('pass:', id)
      continue

    print(id)

    claim_text = claims[id]['claim_text']
    
    data_for_predict = list()

    for evid_id in claims[id]['evidences']:
      evid_text = evidence[evid_id]
      
      data_for_predict.append((claim_text, evid_text))

    set_for_predict = PredictDataset(data_for_predict, maxlen = 512)

    predict_loader = DataLoader(set_for_predict, batch_size = SIZE, num_workers = 2)
    
    predicted_logit = list()
    with torch.no_grad():
      for it, (seq, attn_masks, seg_ids_tensor) in enumerate(predict_loader):
        
        torch.cuda.empty_cache()
        gc.collect()
        seq, attn_masks, seg_ids_tensor = seq.cuda(gpu), attn_masks.cuda(gpu), seg_ids_tensor.cuda(gpu)

        logits = net(seq, attn_masks, seg_ids_tensor).tolist()
      
      predictions[id] = logits

      with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Classifications/{file_name}.pickle', 'wb') as f:
          pickle.dump(predictions, f)

In [29]:
def get_classif_from_pred_retrievals(claims, predicted_evidence, file_name, SIZE):


  try:
    with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Retrievals/{file_name}.pickle', 'rb') as f:
      predictions = pickle.load(f)
  except:
      predictions = {}


  i = 0
  for id in claims:
    if id in predictions:
      print('pass:', id)
      continue

    print(id)

    claim_text = claims[id]['claim_text']
    
    data_for_predict = list()

    for evid_id in predicted_evidence[id]:
      evid_text = evidence[evid_id]
      
      data_for_predict.append((claim_text, evid_text))

    set_for_predict = PredictDataset(data_for_predict, maxlen = 512)

    predict_loader = DataLoader(set_for_predict, batch_size = SIZE, num_workers = 2)
    
    predicted_logit = list()
    with torch.no_grad():
      for it, (seq, attn_masks, seg_ids_tensor) in enumerate(predict_loader):
        
        torch.cuda.empty_cache()
        gc.collect()
        seq, attn_masks, seg_ids_tensor = seq.cuda(gpu), attn_masks.cuda(gpu), seg_ids_tensor.cuda(gpu)

        logits = net(seq, attn_masks, seg_ids_tensor).tolist()
      
      predictions[id] = logits

      with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Classifications/{file_name}.pickle', 'wb') as f:
          pickle.dump(predictions, f)

In [46]:
CLASS_MODEL_A = 'BERT2'

Classifications from Ground Truth

In [47]:
get_classif(dev_claims, f'Classification_{CLASS_MODEL_A}_dev', SIZE=32)

claim-752
claim-375
claim-1266
claim-871
claim-2164
claim-1607
claim-761
claim-1718
claim-1273
claim-1786
claim-2796
claim-2580
claim-1219
claim-75
claim-2813
claim-2335
claim-161
claim-2243
claim-1256
claim-506
claim-369
claim-2184
claim-1057
claim-104
claim-1975
claim-139
claim-2062
claim-1160
claim-2679
claim-2662
claim-1490
claim-2768
claim-2168
claim-785
claim-2426
claim-1292
claim-993
claim-2593
claim-1567
claim-1834
claim-856
claim-540
claim-757
claim-1407
claim-3070
claim-1745
claim-1515
claim-1519
claim-3069
claim-677
claim-765
claim-2275
claim-1113
claim-2611
claim-2060
claim-2326
claim-1087
claim-2867
claim-2300
claim-2250
claim-2429
claim-3051
claim-1549
claim-261
claim-2230
claim-2579
claim-1416
claim-2497
claim-811
claim-1896
claim-2819
claim-2643
claim-1775
claim-316
claim-896
claim-331
claim-2574
claim-342
claim-2034
claim-578
claim-976
claim-1097
claim-609
claim-173
claim-1222
claim-2441
claim-756
claim-2577
claim-2890
claim-2478
claim-2399
claim-3091
claim-141
claim-1

In [None]:
get_classif(test_claims, f'Classification_{CLASS_MODEL_A}_test', SIZE=32)

In [39]:
CLASS_MODEL = "BERT2"
OPEN_PREDICTION = "SBERTspecial"
RETRIEV_MODEL = "sberts"
K = 5

In [40]:
with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Retrievals/Retrieval_{OPEN_PREDICTION}_dev.pickle', 'rb') as f:
    dev_predicted_evidence = pickle.load(f)

dev_claims_pred = {}

for claim in dev_predicted_evidence:
  # evid_list = [x[0] for x in dev_predicted_evidence[claim]['evidences']]
  evid_list = [x[0] for x in dev_predicted_evidence[claim]]

  dev_claims_pred[claim] = evid_list[:K]

In [41]:
with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Retrievals/Retrieval_{OPEN_PREDICTION}_test.pickle', 'rb') as f:
    test_predicted_evidence = pickle.load(f)

test_claims_pred = {}

for claim in test_predicted_evidence:
  # evid_list = [x[0] for x in test_predicted_evidence[claim]['evidences']]
  evid_list = [x[0] for x in test_predicted_evidence[claim]]

  test_claims_pred[claim] = evid_list[:K]

In [42]:
with open(f'./drive/My Drive/LAB/COMP90042 A3/predictions/Retrievals/Retrieval_{OPEN_PREDICTION}_future.pickle', 'rb') as f:
    future_predicted_evidence = pickle.load(f)

future_claims_pred = {}

for claim in future_predicted_evidence:
  # evid_list = [x[0] for x in future_predicted_evidence[claim]['evidences']]
  evid_list = [x[0] for x in future_predicted_evidence[claim]]

  future_claims_pred[claim] = evid_list[:K]

Classifications from Predicted Evidence

In [43]:
get_classif_from_pred_retrievals(dev_claims, dev_claims_pred, f'Classification_{CLASS_MODEL}_{RETRIEV_MODEL}{K}_devp', SIZE=32)

claim-752
claim-375
claim-1266
claim-871
claim-2164
claim-1607
claim-761
claim-1718
claim-1273
claim-1786
claim-2796
claim-2580
claim-1219
claim-75
claim-2813
claim-2335
claim-161
claim-2243
claim-1256
claim-506
claim-369
claim-2184
claim-1057
claim-104
claim-1975
claim-139
claim-2062
claim-1160
claim-2679
claim-2662
claim-1490
claim-2768
claim-2168
claim-785
claim-2426
claim-1292
claim-993
claim-2593
claim-1567
claim-1834
claim-856
claim-540
claim-757
claim-1407
claim-3070
claim-1745
claim-1515
claim-1519
claim-3069
claim-677
claim-765
claim-2275
claim-1113
claim-2611
claim-2060
claim-2326
claim-1087
claim-2867
claim-2300
claim-2250
claim-2429
claim-3051
claim-1549
claim-261
claim-2230
claim-2579
claim-1416
claim-2497
claim-811
claim-1896
claim-2819
claim-2643
claim-1775
claim-316
claim-896
claim-331
claim-2574
claim-342
claim-2034
claim-578
claim-976
claim-1097
claim-609
claim-173
claim-1222
claim-2441
claim-756
claim-2577
claim-2890
claim-2478
claim-2399
claim-3091
claim-141
claim-1

In [44]:
get_classif_from_pred_retrievals(test_claims, test_claims_pred, f'Classification_{CLASS_MODEL}_{RETRIEV_MODEL}{K}_testp', SIZE=32)

claim-1898
claim-2276
claim-564
claim-3003
claim-2173
claim-1818
claim-2903
claim-1362
claim-2726
claim-1466
claim-2040
claim-311
claim-1855
claim-72
claim-840
claim-1075
claim-2374
claim-2305
claim-904
claim-1276
claim-447
claim-1673
claim-2181
claim-1360
claim-2901
claim-586
claim-788
claim-3009
claim-2837
claim-1553
claim-1649
claim-2682
claim-1719
claim-787
claim-2430
claim-3062
claim-1286
claim-1465
claim-1067
claim-2745
claim-2720
claim-2032
claim-1991
claim-920
claim-1421
claim-1555
claim-2358
claim-1565
claim-582
claim-1399
claim-555
claim-1923
claim-1658
claim-512
claim-248
claim-1980
claim-1492
claim-948
claim-2912
claim-2004
claim-1717
claim-995
claim-3079
claim-2068
claim-1817
claim-2223
claim-1825
claim-2009
claim-2542
claim-508
claim-189
claim-44
claim-1376
claim-939
claim-1357
claim-849
claim-418
claim-2272
claim-1983
claim-1504
claim-1626
claim-1510
claim-1463
claim-666
claim-1434
claim-2694
claim-1678
claim-1462
claim-1871
claim-2312
claim-337
claim-2214
claim-962
clai

In [45]:
get_classif_from_pred_retrievals(future_claims, future_claims_pred, f'Classification_{CLASS_MODEL}_{RETRIEV_MODEL}{K}_futurep', SIZE=32)

claim-2967
claim-979
claim-1609
claim-1020
claim-2599
claim-2110
claim-1135
claim-712
claim-1307
claim-148
claim-903
claim-2942
claim-1001
claim-1034
claim-1009
claim-770
claim-3074
claim-1761
claim-1475
claim-477
claim-1378
claim-503
claim-2751
claim-2575
claim-30
claim-2994
claim-55
claim-1271
claim-2248
claim-532
claim-556
claim-1173
claim-539
claim-893
claim-2857
claim-109
claim-2476
claim-3038
claim-3127
claim-474
claim-2464
claim-2427
claim-2167
claim-812
claim-2590
claim-404
claim-2977
claim-2673
claim-2509
claim-138
claim-952
claim-1691
claim-1741
claim-1202
claim-1028
claim-28
claim-275
claim-350
claim-2204
claim-1604
claim-3119
claim-2150
claim-21
claim-2013
claim-467
claim-2754
claim-2797
claim-1771
claim-1908
claim-2000
claim-2084
claim-1237
claim-400
claim-1508
claim-520
claim-3064
claim-1588
claim-1488
claim-2733
claim-809
claim-763
claim-454
claim-1853
claim-2838
claim-2028
claim-2434
claim-298
claim-338
claim-1672
claim-2840
claim-1425
claim-1985
claim-1156
claim-2870
c