In [19]:
import json
import errno
import os
import torch.nn as nn
import torch
import os
import json
import pickle as pkl
import numpy as np
import torch
from tqdm import tqdm
import random
import pandas as pd
import h5py
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score
from transformers import get_linear_schedule_with_warmup,AdamW

def load_pkl(path):
    data=pkl.load(open(path,'rb'))
    return data

def read_json(path):
    # utils.assert_exits(path)
    data=json.load(open(path,'rb'))
    '''in anet-qa returns a list'''
    return data
def bce_for_loss(logits,labels):
    loss=nn.functional.binary_cross_entropy_with_logits(logits, labels)
    loss*=labels.size(1)
    return loss

def compute_auc_score(logits,label):
    bz=logits.shape[0]
    logits=logits.cpu().numpy()
    label=label.cpu().numpy()
    auc=roc_auc_score(label,logits,average='weighted')*bz
    return auc

def compute_score(logits,labels):
    #print (logits,logits.shape)
    logits=torch.max(logits,1)[1]
    #print (logits)
    one_hot=torch.zeros(*labels.size()).cuda()
    one_hot.scatter_(1,logits.view(-1,1),1)
    score=one_hot * labels
    return score.sum().float()

def compute_scaler_score(logits,labels):
    #print (logits,logits.shape)
    logits=torch.max(logits,1)[1]
    labels=labels.squeeze(-1)
    score=(logits==labels).int()
    #print (score.sum(),labels,logits)
    return score.sum().float()


def log_hyperpara(logger,opt):
    dic = vars(opt)
    for k,v in dic.items():
        logger.write(k + ' : ' + str(v))


In [20]:
class Multimodal_Data():
    #mem, off, harm
    def __init__(self,opt,tokenizer,dataset,mode='train',few_shot_index=0):
        super(Multimodal_Data,self).__init__()
        self.opt=opt
        self.tokenizer = tokenizer
        self.mode=mode
        if self.opt.FEW_SHOT:
            self.few_shot_index=str(few_shot_index)
            self.num_shots=self.opt.NUM_SHOTS
            print ('Few shot learning setting for Iteration:',self.few_shot_index)
            print ('Number of shots:',self.num_shots)
        self.image_folder=self.opt.IMAGE_FOLDER
        self.num_ans=self.opt.NUM_LABELS
        #maximum length for a single sentence
        self.length=self.opt.LENGTH
        #maximum length of the concatenation of sentences
        self.total_length=self.opt.TOTAL_LENGTH
        self.num_sample=self.opt.NUM_SAMPLE
        self.add_ent=self.opt.ADD_ENT
        self.add_dem=self.opt.ADD_DEM
        print ('Adding exntity information?',self.add_ent)
        print ('Adding demographic information?',self.add_dem)
        self.fine_grind=self.opt.FINE_GRIND
        print ('Using target information?',self.fine_grind)

        if opt.FINE_GRIND:
            #target information
            if self.opt.DATASET=='mem':
                self.label_mapping_word={0:'nobody',
                                         1:'race',
                                         2:'disability',
                                         3:'nationality',
                                         4:'sex',
                                         5:'religion'}
            elif self.opt.DATASET=='harm':
                self.label_mapping_word={0:'nobody',
                                         1:'society',
                                         2:'individual',
                                         3:'community',
                                         4:'organization'}
                self.attack_list={'society':0,
                                  'individual':1,
                                  'community':2,
                                  'organization':3}
                self.attack_file=load_pkl(os.path.join(self.opt.DATA,
                                                       'domain_splits','harm_trgt.pkl'))
            self.template="*<s>**sent_0*.*_It_was_targeting*label_**</s>*"
        else:
            self.label_mapping_word={0:self.opt.POS_WORD,
                                     1:self.opt.NEG_WORD}
            self.template="*<s>**sent_0*.*_It_was*label_**</s>*"

        self.label_mapping_id={}
        for label in self.label_mapping_word.keys():
            mapping_word=self.label_mapping_word[label]
            #add space already
            assert len(tokenizer.tokenize(' ' + self.label_mapping_word[label])) == 1
            self.label_mapping_id[label] = \
            tokenizer._convert_token_to_id(
                tokenizer.tokenize(' ' + self.label_mapping_word[label])[0])
            print ('Mapping for label %d, word %s, index %d' %
                   (label,mapping_word,self.label_mapping_id[label]))
        #implementation for one template now


        self.template_list=self.template.split('*')
        print('Template:', self.template)
        print('Template list:',self.template_list)
        self.special_token_mapping = {
            '<s>': tokenizer.convert_tokens_to_ids('<s>'),
            '<mask>': tokenizer.mask_token_id,
            '<pad>': tokenizer.pad_token_id, #1 for roberta
            '</s>': tokenizer.convert_tokens_to_ids('<\s>')
        }

        if self.opt.DEM_SAMP:
            print ('Using demonstration sampling strategy...')
            self.img_rate=self.opt.IMG_RATE
            self.text_rate=self.opt.TEXT_RATE
            self.samp_rate=self.opt.SIM_RATE
            print ('Image rage for measuring CLIP similarity:',self.img_rate)
            print ('Text rage for measuring CLIP similarity:',self.text_rate)
            print ('Sampling from top:',self.samp_rate*100.0,'examples')
            self.clip_clean=self.opt.CLIP_CLEAN
            clip_path=os.path.join(
                self.opt.CAPTION_PATH,
                dataset,dataset+'_sim_scores.pkl')
            print ('Clip feature path:',clip_path)
            self.clip_feature=load_pkl(clip_path)

        self.support_examples=self.load_entries('train')
        print ('Length of supporting example:',len(self.support_examples))
        self.entries=self.load_entries(mode)
        if self.opt.DEBUG:
            self.entries=self.entries[:128]
        self.prepare_exp()
        print ('The length of the dataset for:',mode,'is:',len(self.entries))

    def load_entries(self,mode):
        #print ('Loading data from:',self.dataset)
        #only in training mode, in few-shot setting the loading will be different
        if self.opt.FEW_SHOT and mode=='train':
            path=os.path.join(self.opt.DATA,
                              'domain-splits',
                              self.opt.DATASET+'_'+str(self.num_shots)+'_'+self.few_shot_index+'.json')
        else:
            path=os.path.join(self.opt.DATA,
                              'domain-splits',
                              self.opt.DATASET+'_'+mode+'.json')
        data=read_json(path)
        cap_path=os.path.join(self.opt.CAPTION_PATH,
                              self.opt.DATASET+'-'+self.opt.PRETRAIN_DATA,
                              self.opt.IMG_VERSION+'_captions.pkl')
        captions=load_pkl(cap_path)
        entries=[]

        for k,row in enumerate(data):
            label=row['label']
            img=row['img']
            # img = os.path.join(self.image_folder, row['img'])
            # print(img)
            cap=captions[img.split('.')[0]][:-1]#remove the punctuation in the end
            sent=row['clean_sent']
            #remember the punctuations at the end of each sentence
            cap=cap+' . '+sent+' . '
            #whether using external knowledge
            if self.add_ent:
                cap=cap+' . '+row['entity']+' . '
            if self.add_dem:
                cap=cap+' . '+row['race']+' . '
            entry={
                'cap':cap.strip(),
                'label':label,
                'img':img
            }
            if self.fine_grind:
                if self.opt.DATASET=='mem':
                    if label==0:
                        #[1,0,0,0,0,0]
                        entry['attack']=[1]+row['attack']
                    else:
                        entry['attack']=[0]+row['attack']
                elif self.opt.DATASET=='harm':
                    if label==0:
                        #[1,0,0,0,0,0]
                        entry['attack']=[1,0,0,0,0]
                    else:
                        attack=[0,0,0,0,0]
                        attack_idx=self.attack_list[self.attack_file[img]]+1
                        attack[attack_idx]=1
                        entry['attack']=attack
            entries.append(entry)
        return entries

    def enc(self,text):
        return self.tokenizer.encode(text, add_special_tokens=False)

    def prepare_exp(self):
        ###add sampling
        support_indices = list(range(len(self.support_examples)))
        self.example_idx = []
        for sample_idx in tqdm(range(self.num_sample)):
            for query_idx in range(len(self.entries)):
                if self.opt.DEM_SAMP:
                    #filter dissimilar demonstrations
                    candidates= [support_idx for support_idx in support_indices
                                 if support_idx != query_idx or self.mode != "train"]
                    sim_score=[]
                    count_each_label = {label: 0 for label in range(self.opt.NUM_LABELS)}
                    context_indices=[]
                    clip_info_que=self.clip_feature[self.entries[query_idx]['img']]

                    #similarity computation
                    for support_idx in candidates:
                        img=self.support_examples[support_idx]['img']
                        #this cost a lot of computation
                        #unnormalized: the same scale -- 512 dimension
                        if self.clip_clean:
                            img_sim=clip_info_que['clean_img'][img]
                        else:
                            img_sim=clip_info_que['img'][img]
                        text_sim=clip_info_que['text'][img]
                        total_sim=self.img_rate*img_sim+self.text_rate*text_sim
                        sim_score.append((support_idx,total_sim))
                    sim_score.sort(key=lambda x: x[1],reverse=True)

                    #top opt.SIM_RATE entities for each label
                    num_valid=int(len(sim_score)//self.opt.NUM_LABELS*self.samp_rate)
                    """
                    if self.opt.DEBUG:
                        print ('Valid for each class:',num_valid)
                    """

                    for support_idx, score in sim_score:
                        cur_label=self.support_examples[support_idx]['label']
                        if count_each_label[cur_label]<num_valid:
                            count_each_label[cur_label]+=1
                            context_indices.append(support_idx)
                else:
                    #exclude the current example during training
                    context_indices = [support_idx for support_idx in support_indices
                                       if support_idx != query_idx or self.mode != "train"]
                #available indexes for supporting examples
                self.example_idx.append((query_idx, context_indices, sample_idx))

    def select_context(self, context_examples):
        """
        Select demonstrations from provided examples.
        """
        num_labels=self.opt.NUM_LABELS
        max_demo_per_label = 1
        counts = {k: 0 for k in range(num_labels)}
        if num_labels == 1:
            # Regression
            counts = {'0': 0, '1': 0}
        selection = []
        """
        # Sampling strategy from LM-BFF
        if self.opt.DEBUG:
            print ('Number of context examples available:',len(context_examples))
        """
        order = np.random.permutation(len(context_examples))
        for i in order:
            label = context_examples[i]['label']
            if num_labels == 1:
                # Regression
                #No implementation currently
                label = '0' if\
                float(label) <= median_mapping[self.args.task_name] else '1'
            if counts[label] < max_demo_per_label:
                selection.append(context_examples[i])
                counts[label] += 1
            if sum(counts.values()) == len(counts) * max_demo_per_label:
                break

        assert len(selection) > 0
        return selection

    def process_prompt(self, examples,
                       first_sent_limit, other_sent_limit):
        if self.fine_grind:
            prompt_arch=' It was targeting '
        else:
            prompt_arch=' It was '
        #currently, first and other limit are the same
        input_ids = []
        attention_mask = []
        mask_pos = None # Position of the mask token
        concat_sent=""
        for segment_id, ent in enumerate(examples):
            #tokens for each example
            new_tokens=[]
            if segment_id==0:
                #implementation for the querying example
                new_tokens.append(self.special_token_mapping['<s>'])
                length=first_sent_limit
                temp=prompt_arch+'<mask>'+' . </s>'
            else:
                length=other_sent_limit
                if self.fine_grind:
                    if ent['label']==0:
                        label_word=self.label_mapping_word[0]
                    else:
                        attack_types=[i for i, x in enumerate(ent['attack']) if x==1]
                        #only for meme
                        if len(attack_types)==0:
                            attack_idx=random.randint(1,5)
                        #randomly pick one
                        #already padding nobody to the head of the list
                        else:
                            order=np.random.permutation(len(attack_types))
                            attack_idx=attack_types[order[0]]
                        label_word=self.label_mapping_word[attack_idx]
                else:
                    label_word=self.label_mapping_word[ent['label']]
                temp=prompt_arch+label_word+' . </s>'
            new_tokens+=self.enc(' '+ent['cap'])
            #truncate the sentence if too long
            new_tokens=new_tokens[:length]
            new_tokens+=self.enc(temp)
            whole_sent=' '+ent['cap']+temp
            concat_sent+=whole_sent

            #update the prompts
            input_ids+=new_tokens
            attention_mask += [1 for i in range(len(new_tokens))]
        """
        if self.opt.DEBUG and self.opt.DEM_SAMP==False:
            print (concat_sent)
        """
        while len(input_ids) < self.total_length:
            input_ids.append(self.special_token_mapping['<pad>'])
            attention_mask.append(0)
        if len(input_ids) > self.total_length:
            input_ids = input_ids[:self.total_length]
            attention_mask = attention_mask[:self.total_length]
        mask_pos = [input_ids.index(self.special_token_mapping['<mask>'])]

        # Make sure that the masked position is inside the max_length
        assert mask_pos[0] < self.total_length
        result = {'input_ids': input_ids,
                  'sent':'<s>'+concat_sent,
                  'attention_mask': attention_mask,
                  'mask_pos': mask_pos}
        return result


    def __getitem__(self,index):
        #query item
        entry=self.entries[index]
        #bootstrap_idx --> sample_idx
        query_idx, context_indices, bootstrap_idx = self.example_idx[index]
        #one example from each class
        supports = self.select_context(
            [self.support_examples[i] for i in context_indices])
        exps=[]
        exps.append(entry)
        exps.extend(supports)
        prompt_features = self.process_prompt(
            exps,
            self.length,
            self.length
        )

        vid=entry['img']
        #label=torch.tensor(self.label_mapping_id[entry['label']])
        label=torch.tensor(entry['label'])
        target=torch.from_numpy(np.zeros((self.num_ans),dtype=np.float32))
        target[label]=1.0

        cap_tokens=torch.Tensor(prompt_features['input_ids'])
        mask_pos=torch.LongTensor(prompt_features['mask_pos'])
        mask=torch.Tensor(prompt_features['attention_mask'])
        batch={
            'sent':prompt_features['sent'],
            'mask':mask,
            'img':vid,
            'target':target,
            'cap_tokens':cap_tokens,
            'mask_pos':mask_pos,
            'label':label
        }
        if self.fine_grind:
            batch['attack']=torch.Tensor(entry['attack'])
        #print (batch)
        return batch

    def __len__(self):
        return len(self.entries)



In [21]:
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score
from transformers import get_linear_schedule_with_warmup,AdamW

In [22]:
def eval_multi_model(opt, model, tokenizer):
    num_queries = opt.NUM_QUERIES
    labels_record = {}
    logits_record = {}
    prob_record = {}
    correct_predictions = 0
    total_samples = 0

    for k in range(num_queries):
        test_set = Multimodal_Data(opt, tokenizer, opt.DATASET, 'test')
        test_loader = DataLoader(test_set,
                                 opt.BATCH_SIZE,
                                 shuffle=False,
                                 num_workers=1)
        len_data = len(test_loader.dataset)
        print('Length of test set:', len_data, 'Query:', k)

        for i, batch in enumerate(test_loader):
            with torch.no_grad():
                cap = batch['cap_tokens'].long().cuda()
                label = batch['label'].float().cuda().view(-1, 1)
                mask = batch['mask'].cuda()
                mask_pos = batch['mask_pos'].cuda()
                logits = model(cap, mask, mask_pos)

                if opt.FINE_GRIND:
                    logits[:, 1] = torch.sum(logits[:, 1:], dim=1)
                    logits = logits[:, :2]

                target = batch['target'].cuda()
                img = batch['img']
                norm_prob = F.softmax(logits, dim=-1)
                norm_logits = norm_prob[:, 1].unsqueeze(-1)

                bz = cap.shape[0]
                for j in range(bz):
                    cur_img = img[j]
                    cur_logits = norm_logits[j:j + 1]
                    cur_prob = norm_prob[j:j + 1]
                    if k == 0:
                        cur_label = label[j:j + 1]
                        labels_record[cur_img] = cur_label
                        logits_record[cur_img] = cur_logits
                        prob_record[cur_img] = cur_prob
                    else:
                        logits_record[cur_img] += cur_logits
                        prob_record[cur_img] += cur_prob

                    # Calculate accuracy
                    pred_label = torch.argmax(norm_prob[j]).item()
                    true_label = label[j].item()
                    if pred_label == true_label:
                        correct_predictions += 1
                    total_samples += 1

    labels = []
    logits = []
    probs = []
    for name in labels_record.keys():
        labels.append(labels_record[name])
        logits.append(logits_record[name] / num_queries)
        probs.append(prob_record[name] / num_queries)

    logits = torch.cat(logits, dim=0)
    labels = torch.cat(labels, dim=0)
    probs = torch.cat(probs, dim=0)

    scores = compute_scaler_score(probs, labels)
    auc = compute_auc_score(logits, labels)

    accuracy = (correct_predictions / total_samples) * 100.0
    print("accuracy is",accuracy)
    return scores * 100.0 / len_data, auc * 100.0 / len_data

In [23]:
from dataclasses import dataclass

@dataclass
class ModelConfig:
    DATASET: str
    FEW_SHOT: bool
    FINE_GRIND: bool
    NUM_SHOTS: int
    MODEL: str
    UNIMODAL: bool
    DATA: str
    CAPTION_PATH: str
    RESULT: str
    FEAT_DIM: int
    CLIP_DIM: int
    BERT_DIM: int
    ROBERTA_DIM: int
    NUM_FOLD: int
    EMB_DIM: int
    NUM_LABELS: int
    POS_WORD: str
    NEG_WORD: str
    DEM_SAMP: bool
    SIM_RATE: float
    IMG_RATE: float
    TEXT_RATE: float
    CLIP_CLEAN: bool
    MULTI_QUERY: bool
    NUM_QUERIES: int
    EMB_DROPOUT: float
    FC_DROPOUT: float
    WEIGHT_DECAY: float
    LR_RATE: float
    EPS: float
    BATCH_SIZE: int
    FIX_LAYERS: int
    MID_DIM: int
    NUM_HIDDEN: int
    LENGTH: int
    TOTAL_LENGTH: int
    PREFIX_LENGTH: int
    NUM_SAMPLE: int
    NUM_LAYER: int
    MODEL_NAME: str
    PRETRAIN_DATA: str
    IMG_VERSION: str
    MAPPING_TYPE: str
    ADD_ENT: bool
    ADD_DEM: bool
    DEBUG: bool
    SAVE: bool
    SAVE_NUM: int
    EPOCHS: int
    SEED: int
    CUDA_DEVICE: int
    WARM_UP: int
    TRANS_LAYER: int
    NUM_HEAD: int
    IMAGE_FOLDER:str
    BASE_MODEL:str




In [24]:
import torch
import torch.nn as nn
from transformers import RobertaForMaskedLM
from transformers import DistilBertForMaskedLM
class RobertaPromptModel(nn.Module):
    def __init__(self,label_list,base_model):
        super(RobertaPromptModel, self).__init__()
        self.label_word_list=label_list
        self.roberta = RobertaForMaskedLM.from_pretrained(base_model)
#         self.roberta = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')
    def forward(self,tokens,attention_mask,mask_pos,feat=None):
        batch_size = tokens.size(0)
        #the position of word for prediction
        if mask_pos is not None:
            mask_pos = mask_pos.squeeze()

        out = self.roberta(tokens,
                           attention_mask)
        prediction_mask_scores = out.logits[torch.arange(batch_size),
                                          mask_pos]

        logits = []
        for label_id in range(len(self.label_word_list)):
            logits.append(prediction_mask_scores[:,
                                                 self.label_word_list[label_id]
                                                ].unsqueeze(-1))
            #print(prediction_mask_scores[:, self.label_word_list[label_id]].shape)
        logits = torch.cat(logits, -1)
        #print(logits.shape)
        return logits


def build_baseline(opt,label_list):
    print (label_list)
    return RobertaPromptModel(label_list,opt.BASE_MODEL)


In [25]:
opt = ModelConfig(
    DATASET="mem", FEW_SHOT=False, FINE_GRIND=False, NUM_SHOTS=16, MODEL="pbm", UNIMODAL=False,
    DATA="/kaggle/input", CAPTION_PATH="/kaggle/input", RESULT="/kaggle/working", FEAT_DIM=2048, CLIP_DIM=512,
    BERT_DIM=768, ROBERTA_DIM=1024, NUM_FOLD=5, EMB_DIM=300, NUM_LABELS=2, POS_WORD="good",
    NEG_WORD="bad", DEM_SAMP=False, SIM_RATE=0.5, IMG_RATE=0.5, TEXT_RATE=0.5, CLIP_CLEAN=False,
    MULTI_QUERY=True, NUM_QUERIES=4, EMB_DROPOUT=0.0, FC_DROPOUT=0.4, WEIGHT_DECAY=0.01, LR_RATE=1.3e-5,
    EPS=1e-8, BATCH_SIZE=16, FIX_LAYERS=2, MID_DIM=512, NUM_HIDDEN=512, LENGTH=64, TOTAL_LENGTH=256,
    PREFIX_LENGTH=10, NUM_SAMPLE=1, NUM_LAYER=8, MODEL_NAME="roberta-large", PRETRAIN_DATA="conceptual",
    IMG_VERSION="clean", MAPPING_TYPE="transformer", ADD_ENT=True, ADD_DEM=True, DEBUG=False, SAVE=False,
    SAVE_NUM=100, EPOCHS=1, SEED=1111, CUDA_DEVICE=15, WARM_UP=2000, TRANS_LAYER=1, NUM_HEAD=8,IMAGE_FOLDER='/content/drive/MyDrive/meme/data',
    BASE_MODEL='roberta-base'
)

In [26]:
import torch
import torch.nn as nn
from transformers import RobertaForMaskedLM
from transformers import RobertaTokenizer
# Example label list (replace this with your actual label list)
tokenizer = RobertaTokenizer.from_pretrained(opt.BASE_MODEL)
train_set=Multimodal_Data(opt,tokenizer,opt.DATASET,'train',opt.SEED-1111)
label_list = [train_set.label_mapping_id[i] for i in train_set.label_mapping_word.keys()]

# Build the model
model = build_baseline(opt, label_list)
compile
# Load the state dictionary
model_path = '/kaggle/input/rpm-model/model.pth'
state_dict = torch.load(model_path)

# Load the state dictionary into the model
model.load_state_dict(state_dict)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



Adding exntity information? True
Adding demographic information? True
Using target information? False
Mapping for label 0, word good, index 205
Mapping for label 1, word bad, index 1099
Template: *<s>**sent_0*.*_It_was*label_**</s>*
Template list: ['', '<s>', '', 'sent_0', '.', '_It_was', 'label_', '', '</s>', '']
Length of supporting example: 8500


100%|██████████| 1/1 [00:05<00:00,  5.66s/it]


The length of the dataset for: train is: 8500
[205, 1099]


  state_dict = torch.load(model_path)


RobertaPromptModel(
  (roberta): RobertaForMaskedLM(
    (roberta): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (dense): Linear(

In [27]:
# Assuming `opt` is already defined and `eval_multi_model` is your evaluation function
print(eval_multi_model(opt, model, tokenizer))

Adding exntity information? True
Adding demographic information? True
Using target information? False
Mapping for label 0, word good, index 205
Mapping for label 1, word bad, index 1099
Template: *<s>**sent_0*.*_It_was*label_**</s>*
Template list: ['', '<s>', '', 'sent_0', '.', '_It_was', 'label_', '', '</s>', '']
Length of supporting example: 8500


100%|██████████| 1/1 [00:00<00:00,  3.23it/s]

The length of the dataset for: test is: 500
Length of test set: 500 Query: 0





Adding exntity information? True
Adding demographic information? True
Using target information? False
Mapping for label 0, word good, index 205
Mapping for label 1, word bad, index 1099
Template: *<s>**sent_0*.*_It_was*label_**</s>*
Template list: ['', '<s>', '', 'sent_0', '.', '_It_was', 'label_', '', '</s>', '']
Length of supporting example: 8500


100%|██████████| 1/1 [00:00<00:00,  3.21it/s]

The length of the dataset for: test is: 500
Length of test set: 500 Query: 1





Adding exntity information? True
Adding demographic information? True
Using target information? False
Mapping for label 0, word good, index 205
Mapping for label 1, word bad, index 1099
Template: *<s>**sent_0*.*_It_was*label_**</s>*
Template list: ['', '<s>', '', 'sent_0', '.', '_It_was', 'label_', '', '</s>', '']
Length of supporting example: 8500


100%|██████████| 1/1 [00:00<00:00,  2.95it/s]

The length of the dataset for: test is: 500
Length of test set: 500 Query: 2





Adding exntity information? True
Adding demographic information? True
Using target information? False
Mapping for label 0, word good, index 205
Mapping for label 1, word bad, index 1099
Template: *<s>**sent_0*.*_It_was*label_**</s>*
Template list: ['', '<s>', '', 'sent_0', '.', '_It_was', 'label_', '', '</s>', '']
Length of supporting example: 8500


100%|██████████| 1/1 [00:00<00:00,  3.28it/s]

The length of the dataset for: test is: 500
Length of test set: 500 Query: 3





accuracy is 57.9
(tensor(60.4000, device='cuda:0'), 66.74401113760382)
