In [1]:
# increase the cell width 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; } </style>"))   

# need to run this every time start this notebook, to add python3.7/site-packages to sys.pat, in order to import ipywidgets, which is used when RobertaTokenizer.from_pretrained('roberta-base') 
import sys
# sys.path.insert(0, '/xdisk/msurdeanu/fanluo/miniconda3/envs/hotpotqa/lib/python3.7/site-packages') 

### convert hotpotqa to squard format

According to Longformer: use the following input format with special tokens:  “[CLS] [q] question [/q] [p] sent1,1 [s] sent1,2 [s] ... [p] sent2,1 [s] sent2,2 [s] ...” 
where [s] and [p] are special tokens representing sentences and paragraphs. The special tokens were added to the RoBERTa vocabulary and randomly initialized before task finetuning.

In [2]:
# helper functions to convert hotpotqa to squard format modified from  https://github.com/chiayewken/bert-qa/blob/master/run_hotpot.py

import tqdm 
from datetime import datetime 
import pytz 
timeZ_Az = pytz.timezone('US/Mountain') 
#!pip install -U transformers
#!pip install torch==1.6.0 torchvision==0.7.0
import transformers 

QUESTION_START = '[question]'
QUESTION_END = '[/question]' 
TITLE_START = '<t>'  # indicating the start of the title of a paragraph (also used for loss over paragraphs)
TITLE_END = '</t>'   # indicating the end of the title of a paragraph
SENT_MARKER_END = '[/sent]'  # indicating the end of the title of a sentence (used for loss over sentences)
PAR = '[/par]'  # used for indicating end of the regular context and beginning of `yes/no/null` answers
EXTRA_ANSWERS = " yes no null"

 
def create_example_dict(context, answer, id, question, is_sup_fact, is_supporting_para):
    return {
        "context": context,
        "qas": [                        # each context corresponds to only one qa in hotpotqa
            {
                "answer": answer,
                "id": id,
                "question": question,
                "is_sup_fact": is_sup_fact,
                "is_supporting_para": is_supporting_para
            }
        ],
    }

def create_para_dict(example_dicts):
    if type(example_dicts) == dict:
        example_dicts = [example_dicts]   # each paragraph corresponds to only one [context, qas] in hotpotqa
    return {"paragraphs": example_dicts}   


In [3]:
# !python -m pip install tqdm 
# !python -m pip install git+https://github.com/allenai/longformer.git 
# !python -m pip install pytorch-lightning==0.6.0
# !python -m pip install jdc  
# !wget https://ai2-s2-research.s3-us-west-2.amazonaws.com/longformer/longformer-base-4096.tar.gz
# !tar -xf longformer-base-4096.tar.gz

In [4]:
import re
import string

def convert_hotpot_to_squad_format(json_dict, gold_paras_only=False):
    
    """function to convert hotpotqa to squard format.


    Note: A context corresponds to several qas in SQuard. In hotpotqa, one question corresponds to several paragraphs as context. 
          "paragraphs" means different: each paragraph in SQuard contains a context and a list of qas; while 10 paragraphs in hotpotqa concatenated into a context for one question.

    Args:
        json_dict: The original data load from hotpotqa file.
        gold_paras_only: when is true, only use the 2 paragraphs that contain the gold supporting facts; if false, use all the 10 paragraphs
 

    Returns:
        new_dict: The converted dict of hotpotqa dataset, use it as a dict would load from SQuAD json file
                  usage: input_data = new_dict["data"]   https://github.com/google-research/bert/blob/eedf5716ce1268e56f0a50264a88cafad334ac61/run_squad.py#L230

    """
 
    new_dict = {"data": []} 
    for example in json_dict: 

        support_para = set(
            para_title for para_title, _ in example["supporting_facts"]
        )
        sp_set = set(list(map(tuple, example['supporting_facts'])))
        
        raw_contexts = example["context"]
        if gold_paras_only: 
            raw_contexts = [lst for lst in raw_contexts if lst[0] in support_para]
            
        is_supporting_para = []  # a boolean list with 10 True/False elements, one for each paragraph
        is_sup_fact = []         # a boolean list with True/False elements, one for each context sentence
        for para_title, para_lines in raw_contexts:
            is_supporting_para.append(para_title in support_para)   
            for sent_id, sent in enumerate(para_lines):
                is_sup_fact.append( (para_title, sent_id) in sp_set )    
        
        contexts = []   
        for para_id, para in enumerate(raw_contexts):   
            title = _normalize_text(para[0])    
            sents = [_normalize_text(sent) for sent in para[1]] 
            
            if("kept_para_sent" in example):    # reduceded context 
                sent_joint = '' 
                for sent_id, sent in enumerate(sents):  
                    if(sent_id > 0 and example["kept_para_sent"][para_id][sent_id] - example["kept_para_sent"][para_id][sent_id-1] > 1):    
                        sent_joint += (' </s> ' + sent + ' ' + SENT_MARKER_END )   # </s> indicates at least one sentence is removed    
                    else:   
                        sent_joint += (sent + ' ' + SENT_MARKER_END )   
            else:   
                sent_joint =  (' ' + SENT_MARKER_END +' ').join(sents) + ' ' + SENT_MARKER_END      
                
            contexts.append(TITLE_START + ' ' + title + ' ' + TITLE_END + ' ' +  sent_joint)    
        # extra space is fine, which would be ignored latter. most sentences has already have heading space, there are several no heading space; call the _normalize_text() which is same as the one used during evaluation
        
#         context = " </s> ".join(contexts)
#         print(context)
        
#         exit(0)

        
        answer = _normalize_text(example["answer"]) 
#         print("answer: ", answer)
        if(len(answer) > 0):   # answer can be '' after normalize
            new_dict["data"].append(
                create_para_dict(
                    create_example_dict(
                        context=contexts,
                        answer=answer,
                        id = example["_id"],
                        question=_normalize_text(example["question"]),
                        is_sup_fact = is_sup_fact,
                        is_supporting_para = is_supporting_para 
                    )
                )
            ) 

    return new_dict

def _normalize_text(s):

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

In [5]:
# debug: check whether convert_hotpot_to_squad_format() works
import os
os.chdir('/xdisk/msurdeanu/fanluo/hotpotQA/Data')
# !cat /xdisk/msurdeanu/fanluo/hotpotQA/Data/hotpot_train_v1.1.json | ../../helper/jq-linux64 -c '.[1:30]' > small.json
#!cat /xdisk/msurdeanu/fanluo/hotpotQA/Data/hotpot_train_v1.1.json | ../../helper/jq-linux64 -c '.[37:50]' > small_dev.json
# !cat /xdisk/msurdeanu/fanluo/hotpotQA/Data/hotpot_train_v1.1.json | ../../helper/jq-linux64 -c '.[31:50]' > sample.json
# !cat /xdisk/msurdeanu/fanluo/hotpotQA/Data/reduced_questions/hotpot_reduced_context_04-08-2021-01:12:53/hotpot_dev_reduced_context_coref_fuzzy.json | ../../helper/jq-linux64 -c '.[6666:7000]' > small_dev.json
# !cat /xdisk/msurdeanu/fanluo/hotpotQA/Data/reduced_questions/hotpot_reduced_context_04-08-2021-01:12:53/hotpot_dev_reduced_context_coref_fuzzy.json | ../../helper/jq-linux64 -c '.[1515:1525]' > small_dev3.json
# !cat /xdisk/msurdeanu/fanluo/hotpotQA/Data/reduced_questions/hotpot_reduced_context_04-08-2021-01:12:53/hotpot_dev_reduced_context_coref_fuzzy.json | ../../helper/jq-linux64 -c '.[6800:7405]' > small_end.json    
    
import json
with open("small.json", "r", encoding='utf-8') as f:  
    json_dict = convert_hotpot_to_squad_format(json.load(f))['data']
    print(json.dumps(json_dict[3], indent=2))

{
  "paragraphs": [
    {
      "context": [
        "<t> cadmium chloride </t> cadmium chloride is white crystalline compound of cadmium and chlorine with formula cdcl [/sent] it is hygroscopic solid that is highly soluble in water and slightly soluble in alcohol [/sent] although it is considered to be ionic it has considerable covalent character to its bonding [/sent] crystal structure of cadmium chloride described below composed of twodimensional layers of ions is reference for describing other crystal structures [/sent] also known are cdcl\u2022ho and cdcl\u20225ho [/sent]",
        "<t> water blue </t> water blue also known as aniline blue acid blue 22 soluble blue 3m marine blue v or ci 42755 is chemical compound used as stain in histology [/sent] water blue stains collagen blue in tissue sections [/sent] it is soluble in water and slightly soluble in ethanol [/sent]",
        "<t> diflucortolone valerate </t> diflucortolone valerate also nerisone creamoily creamointment neriderm

### longfomer's fine-tuning


- For answer span extraction we use BERT’s QA model with addition of a question type (yes/no/span) classification head over the first special token ([CLS]).

- For evidence extraction we apply 2 layer feedforward networks on top of the representations corresponding to sentence and paragraph tokens to get the corresponding evidence prediction scores and use binary cross entropy loss to train the model.

- We combine span, question classification, sentence, and paragraphs losses and train the model in a multitask way using linear combination of losses.


In [6]:
### Section2: This is modified from longfomer's fine-tuning with triviaqa.py from https://github.com/allenai/longformer/blob/master/scripts/triviaqa.py

# !pip uninstall longformer -y
# !python -m pip uninstall longformer -y
# !pip install git+https://github.com/allenai/longformer.git 
# !python -m pip uninstall pytorch-lightning -y
# !pip uninstall pytorch-lightning -y
# !python -m pip install git+http://github.com/ibeltagy/pytorch-lightning.git@v0.8.5_fixes#egg=pytorch-lightning
#!pip install torch==1.6.0 torchvision==0.7.0
 


####requirements.txt:torch>=1.2.0, transformers>=3.0.2, tensorboardX, pytorch-lightning==0.6.0, test-tube==0.7.5
# !conda install transformers --yes
# !conda install cudatoolkit=10.0 --yes
# !python -m pip install git+https://github.com/allenai/longformer.git
# !conda install -c conda-forge regex --force-reinstall --yes
# !conda install pytorch-lightning -c conda-forge
#!python -m pip install jdc 
# !pip install test-tube 
#!python -m pip install ipywidgets 
# !conda update --force conda --yes  
# !jupyter nbextension enable --py widgetsnbextension 
# !conda install jupyter --yes


import os
import json
import string
import random
import numpy as np
import torch
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader, Dataset 

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel
from pytorch_lightning.logging import TestTubeLogger    # sometimes pytorch_lightning.loggers works instead

from longformer.longformer import Longformer, LongformerConfig
from longformer.sliding_chunks import pad_to_window_size
from transformers import RobertaTokenizer
import jdc
from more_itertools import locate
from collections import Counter
from collections import defaultdict

In [7]:
print(pl.__file__)

/home/u32/fanluo/.local/lib/python3.6/site-packages/pytorch_lightning/__init__.py


#### class hotpotqaDataset

##### \_\_init\_\_, \_\_getitem\_\_ and \_\_len\_\_ 

In [8]:
class hotpotqaDataset(Dataset):
    """
    Largely based on
    https://github.com/allenai/allennlp/blob/master/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py
    and
    https://github.com/huggingface/transformers/blob/master/examples/run_squad.py
    """
    
    
    def __init__(self, file_path, tokenizer, max_seq_len, max_doc_len, doc_stride,
                 max_num_answers, ignore_seq_with_no_answers, max_question_len):
        assert os.path.isfile(file_path)
        self.file_path = file_path
#         if("reduced_context" not in self.file_path):
        with open(self.file_path, "r", encoding='utf-8') as f:
            print(f'reading file: {self.file_path}')
            self.data_json = convert_hotpot_to_squad_format(json.load(f))['data']
                
#         else:
#             with open(self.file_path, "r", encoding='utf-8') as f:
#                 print(f'reading file: {self.file_path}')
#                 self.data_json = json.load(f)['data']            
#                 print(self.data_json[0])
            
        self.tokenizer = tokenizer
        self.max_seq_len = max_seq_len
        self.max_doc_len = max_doc_len
        self.doc_stride = doc_stride
        self.max_num_answers = max_num_answers
        self.ignore_seq_with_no_answers = ignore_seq_with_no_answers
        self.max_question_len = max_question_len


#         print(tokenizer.all_special_tokens) 
    
        # A mapping from qid to an int, which can be synched across gpus using `torch.distributed`
        if 'train' not in self.file_path:  # only for the evaluation set 
            self.val_qid_string_to_int_map =                  {
                    entry["paragraphs"][0]['qas'][0]['id']: index
                    for index, entry in enumerate(self.data_json)
                }
        else:
            self.val_qid_string_to_int_map = None
            
            
    def __len__(self):
        return len(self.data_json)

    def __getitem__(self, idx):
        entry = self.data_json[idx]
        tensors_list = self.one_example_to_tensors(entry, idx)
        if(len(tensors_list) != 1):
            print("tensors_list: ", tensors_list)
        assert len(tensors_list) == 1
        return tensors_list[0]

##### one_example_to_tensors

In [9]:
    %%add_to hotpotqaDataset
    def one_example_to_tensors(self, example, idx):
        def is_whitespace(c):
            if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
                return True
            return False
        
        def map_answer_positions(char_to_word_offset, orig_to_tok_index, answer_start, answer_end, slice_start, slice_end, doc_offset):
            # char offset to word offset
            if(answer_start >= len(char_to_word_offset)):
                print("answer_start: ", answer_start)
                print("len(char_to_word_offset): ", len(char_to_word_offset))
            # char offset to word offset
            start_word_position = char_to_word_offset[answer_start]
            end_word_position = char_to_word_offset[answer_end-1] 

#             print("start_word_position: ", start_word_position)
#             print("end_word_position: ", end_word_position)
            # sub_tokens postion reletive to context
            tok_start_position_in_doc = orig_to_tok_index[start_word_position]  
            not_end_of_doc = int(end_word_position + 1 < len(orig_to_tok_index))
            tok_end_position_in_doc = orig_to_tok_index[end_word_position + not_end_of_doc] - not_end_of_doc
            
            if tok_start_position_in_doc < slice_start or tok_end_position_in_doc > slice_end:
                return (-1, -1) # this answer is outside the current slice                     
            
            # sub_tokens postion reletive to begining of all the tokens, including query sub tokens  
            start_position = tok_start_position_in_doc + doc_offset  
            end_position = tok_end_position_in_doc + doc_offset
            
            return (start_position, end_position)
        
#         print("idx: ", idx)
#         print("len(example): ", "len(example)")
        if(len(example["paragraphs"])==0):
            print("idx: ", idx, "'s len(example[‘paragraphs’])==0")

        tensors_list = []
        for paragraph in example["paragraphs"]:  # example["paragraphs"] only contains one paragraph in hotpotqa
            # print("for paragraph in example['paragraphs']: ") 
            context = self.tokenizer.sep_token + ' ' + (' ' + self.tokenizer.sep_token + ' ').join(paragraph["context"] )   
            doc_tokens = []
            char_to_word_offset = []
            prev_is_whitespace = True
            for c in context:
                if is_whitespace(c):
                    prev_is_whitespace = True
                else:
                    if prev_is_whitespace:
                        doc_tokens.append(c) # add a new token
                    else:
                        doc_tokens[-1] += c  # append the character to the last token
                    prev_is_whitespace = False
                char_to_word_offset.append(len(doc_tokens) - 1)
            
#             print("len(char_to_word_offset): ", len(char_to_word_offset))
#             print("char_to_word_offset: ", char_to_word_offset)
            for qa in paragraph["qas"]:
                question_text = qa["question"]
                # print("question text: ", question_text)  
                sp_sent = qa["is_sup_fact"]
                sp_para = qa["is_supporting_para"]
                start_position = None
                end_position = None
                orig_answer_text = None 

#                     print("len(sp_sent):", len(sp_sent))
#                     print("sp_sent", sp_sent) 
#                     print("doc_tokens", doc_tokens)
 
                # keep all answers in the document, not just the first matched answer. It also added the list of textual answers to make evaluation easy.
                
                   
                # ===== Given an example, convert it into tensors  =============
                 
                query_tokens = self.tokenizer.tokenize(question_text)
                query_tokens = query_tokens[:self.max_question_len]
                tok_to_orig_index = []
                orig_to_tok_index = []
                all_doc_tokens = []
                
                # each original token in the context is tokenized to multiple sub_tokens
                for (i, token) in enumerate(doc_tokens):
                    orig_to_tok_index.append(len(all_doc_tokens))
                    # hack: the line below should have been `self.tokenizer.tokenize(token')`
                    # but roberta tokenizer uses a different subword if the token is the beginning of the string
                    # or in the middle. So for all tokens other than the first, simulate that it is not the first
                    # token by prepending a period before tokenizing, then dropping the period afterwards
                    sub_tokens = self.tokenizer.tokenize(f'. {token}')[1:] if i > 0 else self.tokenizer.tokenize(token)
                    for sub_token in sub_tokens:
                        tok_to_orig_index.append(i)
                        all_doc_tokens.append(sub_token)
                
                # all sub tokens, truncate up to limit
                all_doc_tokens = all_doc_tokens[:self.max_doc_len-7] 

                # The -7 accounts for CLS, QUESTION_START, QUESTION_END， [/par]， yes， no， </s>   
                max_tokens_per_doc_slice = self.max_seq_len - len(query_tokens) - 7
                if(max_tokens_per_doc_slice <= 0):
                    print("(max_tokens_per_doc_slice <= 0)")
                assert max_tokens_per_doc_slice > 0
                if self.doc_stride < 0:                           # default
                    # negative doc_stride indicates no sliding window, but using first slice
                    self.doc_stride = -100 * len(all_doc_tokens)  # large -negtive value for the next loop to execute once
                
                # inputs to the model
                input_ids_list = []
                input_mask_list = []
                segment_ids_list = []
                start_positions_list = []
                end_positions_list = []
                q_type_list = []
                sp_sent_list =  [1 if ss else 0 for ss in sp_sent]
                sp_para_list = [1 if sp else 0 for sp in sp_para]
                
                if(len(all_doc_tokens) == 0):
                    print("idx: ", idx, " len(all_doc_tokens) == 0")
#               
                
                for slice_start in range(0, len(all_doc_tokens), max_tokens_per_doc_slice - self.doc_stride):    # execute once by default
                
                    # print("slice_start in range") 
                    slice_end = min(slice_start + max_tokens_per_doc_slice, len(all_doc_tokens))

                    doc_slice_tokens = all_doc_tokens[slice_start:slice_end]
                    tokens = [self.tokenizer.cls_token] + [QUESTION_START] + query_tokens + [QUESTION_END] + doc_slice_tokens + [PAR] + self.tokenizer.tokenize("yes") + self.tokenizer.tokenize("no") + [self.tokenizer.eos_token]   
                    segment_ids = [0] * (len(query_tokens) + 3) + [1] * (len(doc_slice_tokens) + 4) 
#                     if(len(segment_ids) != len(tokens)):
#                         print("len(segment_ids): ", len(segment_ids))
#                         print("len(tokens): ", len(tokens))
                    assert len(segment_ids) == len(tokens)

                    input_ids = self.tokenizer.convert_tokens_to_ids(tokens)   
                    input_mask = [1] * len(input_ids)

                    doc_offset = len(query_tokens) + 3 - slice_start  # where context starts
                    
                    # ===== answer positions tensors  ============
                    start_positions = []
                    end_positions = []
 
                    answer = qa["answer"] 
                    # print("idx: ", idx, " qa['id']: ", qa['id'], " answer: ", answer)
                    if answer == '':
                        q_type = -1
                        start_positions.append(-1)   
                        end_positions.append(-1)           
                    
                    elif answer == 'yes':
                        q_type = 1
                        start_positions.append(len(tokens)-3)   
                        end_positions.append(len(tokens)-3) 
                    elif answer == 'no':
                        q_type = 2
                        start_positions.append(len(tokens)-2)   
                        end_positions.append(len(tokens)-2)  
                    else:
                        # keep all the occurences of answer in the context 
#                         for m in re.finditer("\s?".join(answer.split()), context):   # "\s?".join(answer.split()) in order to match even with extra space in answer or context
                        for m in re.finditer(_normalize_text(answer), context, re.IGNORECASE):
                            answer_start, answer_end = m.span() 
                            start_position, end_position = map_answer_positions(char_to_word_offset, orig_to_tok_index, answer_start, answer_end, slice_start, slice_end, doc_offset)
                            if(start_position != -1):
                                start_positions.append(start_position)   
                                end_positions.append(end_position)
                            
                        if(len(start_positions) > 0): 
                            q_type = 0
                        else: # answer not found in context
                            q_type = -1
                            start_positions.append(-1)   
                            end_positions.append(-1) 


                    # answers from start_positions and end_positions if > self.max_num_answers
                    start_positions = start_positions[:self.max_num_answers]
                    end_positions = end_positions[:self.max_num_answers]

                    # -1 padding up to self.max_num_answers
                    padding_len = self.max_num_answers - len(start_positions)
                    start_positions.extend([-1] * padding_len)
                    end_positions.extend([-1] * padding_len)

                    # replace duplicate start/end positions with `-1` because duplicates can result into -ve loss values
                    found_start_positions = set()
                    found_end_positions = set()
                    for i, (start_position, end_position) in enumerate(zip(start_positions, end_positions)):
                        
                        if start_position in found_start_positions:
                            start_positions[i] = -1
                        if end_position in found_end_positions:
                            end_positions[i] = -1
                        found_start_positions.add(start_position)
                        found_end_positions.add(end_position)
                    
                                         
                    if self.doc_stride >= 0:  # no need to pad if document is not strided
                        # Zero-pad up to the sequence length.
                        padding_len = self.max_seq_len - len(input_ids)
                        input_ids.extend([self.tokenizer.pad_token_id] * padding_len)
                        input_mask.extend([0] * padding_len)
                        segment_ids.extend([0] * padding_len)
                        
                        print("self.doc_stride >= 0")
                        assert len(input_ids) == self.max_seq_len
                        assert len(input_mask) == self.max_seq_len
                        assert len(segment_ids) == self.max_seq_len  
                        
                    input_ids_list.append(input_ids)
                    input_mask_list.append(input_mask)
                    segment_ids_list.append(segment_ids)
                    start_positions_list.append(start_positions)
                    end_positions_list.append(end_positions)
                    q_type_list.append(q_type)
                    
                tensors_list.append((torch.tensor(input_ids_list), torch.tensor(input_mask_list), torch.tensor(segment_ids_list),
                                     torch.tensor(start_positions_list), torch.tensor(end_positions_list), torch.tensor(q_type_list),
                                     torch.tensor([sp_sent_list]),  torch.tensor([sp_para_list]),
                                     qa['id'], answer))     
        return tensors_list



##### collate_one_doc_and_lists

In [10]:
    %%add_to hotpotqaDataset
    @staticmethod
    def collate_one_doc_and_lists(batch):
        num_metadata_fields = 2  # qid and answer  
        fields = [x for x in zip(*batch)]
        stacked_fields = [torch.stack(field) for field in fields[:-num_metadata_fields]]  # don't stack metadata fields
        stacked_fields.extend(fields[-num_metadata_fields:])  # add them as lists not torch tensors

        # always use batch_size=1 where each batch is one document
        # will use grad_accum to increase effective batch size
        assert len(batch) == 1
        fields_with_batch_size_one = [f[0] for f in stacked_fields]
        return fields_with_batch_size_one


##### class info

In [11]:
dir(hotpotqaDataset)

['__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'collate_one_doc_and_lists',
 'one_example_to_tensors']

In [12]:
from inspect import getmembers
getmembers(hotpotqaDataset)

[('__add__', <function torch.utils.data.dataset.Dataset.__add__(self, other)>),
 ('__class__', type),
 ('__delattr__', <slot wrapper '__delattr__' of 'object' objects>),
 ('__dict__',
  mappingproxy({'__module__': '__main__',
                '__doc__': '\n    Largely based on\n    https://github.com/allenai/allennlp/blob/master/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py\n    and\n    https://github.com/huggingface/transformers/blob/master/examples/run_squad.py\n    ',
                '__init__': <function __main__.hotpotqaDataset.__init__(self, file_path, tokenizer, max_seq_len, max_doc_len, doc_stride, max_num_answers, ignore_seq_with_no_answers, max_question_len)>,
                '__len__': <function __main__.hotpotqaDataset.__len__(self)>,
                '__getitem__': <function __main__.hotpotqaDataset.__getitem__(self, idx)>,
                'one_example_to_tensors': <function __main__.one_example_to_tensors(self, example, idx)>,
                'collate_one

In [13]:
from inspect import isfunction
functions_list = [o for o in getmembers(hotpotqaDataset) if isfunction(o[1])]
functions_list

[('__add__', <function torch.utils.data.dataset.Dataset.__add__(self, other)>),
 ('__getitem__', <function __main__.hotpotqaDataset.__getitem__(self, idx)>),
 ('__init__',
  <function __main__.hotpotqaDataset.__init__(self, file_path, tokenizer, max_seq_len, max_doc_len, doc_stride, max_num_answers, ignore_seq_with_no_answers, max_question_len)>),
 ('__len__', <function __main__.hotpotqaDataset.__len__(self)>),
 ('collate_one_doc_and_lists',
  <function __main__.collate_one_doc_and_lists(batch)>),
 ('one_example_to_tensors',
  <function __main__.one_example_to_tensors(self, example, idx)>)]

In [14]:
import inspect
inspect.getmro(hotpotqaDataset)  # a hierarchy of classes 

(__main__.hotpotqaDataset, torch.utils.data.dataset.Dataset, object)

In [15]:
inspect.getfullargspec(hotpotqaDataset.one_example_to_tensors)

FullArgSpec(args=['self', 'example', 'idx'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={})

In [16]:
help(hotpotqaDataset)

Help on class hotpotqaDataset in module __main__:

class hotpotqaDataset(torch.utils.data.dataset.Dataset)
 |  Largely based on
 |  https://github.com/allenai/allennlp/blob/master/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py
 |  and
 |  https://github.com/huggingface/transformers/blob/master/examples/run_squad.py
 |  
 |  Method resolution order:
 |      hotpotqaDataset
 |      torch.utils.data.dataset.Dataset
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __getitem__(self, idx)
 |  
 |  __init__(self, file_path, tokenizer, max_seq_len, max_doc_len, doc_stride, max_num_answers, ignore_seq_with_no_answers, max_question_len)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __len__(self)
 |  
 |  one_example_to_tensors(self, example, idx)
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  collate_one_doc_and_lists(batch)
 |  
 |  -----------------------

#### class hotpotqa

##### \_\_init\_\_,  forward, dataloaders

In [17]:
class hotpotqa(pl.LightningModule):
    def __init__(self, args):
        super(hotpotqa, self).__init__()
        self.args = args
        self.hparams = args
 
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        num_new_tokens = self.tokenizer.add_special_tokens({"additional_special_tokens": [TITLE_START, TITLE_END, SENT_MARKER_END, QUESTION_START , QUESTION_END, PAR]})
#         print(self.tokenizer.all_special_tokens)
        self.tokenizer.model_max_length = self.args.max_seq_len
        self.model = self.load_model()
        self.model.resize_token_embeddings(len(self.tokenizer))
        self.num_labels = 2
        self.qa_outputs = torch.nn.Linear(self.model.config.hidden_size, self.num_labels)
         
        self.linear_type = torch.nn.Linear(self.model.config.hidden_size, 3)   #  question type (yes/no/span/null) classification 

        self.fnn_sp_sent = torch.nn.Sequential(
          torch.nn.Linear(self.model.config.hidden_size, self.model.config.hidden_size), 
          torch.nn.GELU(),
          torch.nn.Linear(self.model.config.hidden_size, 1),      # score for 'yes', while 0 for 'no'
        )
        
        self.fnn_sp_para = torch.nn.Sequential(
          torch.nn.Linear(self.model.config.hidden_size, self.model.config.hidden_size), 
          torch.nn.GELU(),
          torch.nn.Linear(self.model.config.hidden_size, 1),      # score for 'yes', while 0 for 'no'
        )
         
        
        self.train_dataloader_object = self.val_dataloader_object = self.test_dataloader_object = None
        
 
    def load_model(self):
        
        config = LongformerConfig.from_pretrained(self.args.model_path) 
        # choose the attention mode 'n2', 'tvm' or 'sliding_chunks'
        # 'n2': for regular n2 attantion
        # 'tvm': a custom CUDA kernel implementation of our sliding window attention
        # 'sliding_chunks': a PyTorch implementation of our sliding window attention
        config.attention_mode = 'sliding_chunks'
        model = Longformer.from_pretrained(self.args.model_path, config=config)

        print("self.args.model_path: ", self.args.model_path)
        for layer in model.encoder.layer:
            layer.attention.self.attention_mode = self.args.attention_mode
            self.args.attention_window = layer.attention.self.attention_window

        print("Loaded model with config:")
        print(model.config)

        for p in model.parameters():
            p.requires_grad_(True)
        model.train()
        return model

#%%add_to hotpotqa    # does not seems to work for the @pl.data_loader decorator, missing which causes error "validation_step() takes 3 positional arguments but 4 were given"    
###################################################### dataloaders ########################################################### 
    @pl.data_loader
    def train_dataloader(self):
        if self.train_dataloader_object is not None:
            return self.train_dataloader_object
        dataset = hotpotqaDataset(file_path=self.args.train_dataset, tokenizer=self.tokenizer,
                                  max_seq_len=self.args.max_seq_len, max_doc_len=self.args.max_doc_len,
                                  doc_stride=self.args.doc_stride,
                                  max_num_answers=self.args.max_num_answers,
                                  max_question_len=self.args.max_question_len,
                                  ignore_seq_with_no_answers=self.args.ignore_seq_with_no_answers)
        
#         dist_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        dl = DataLoader(dataset, batch_size=1, shuffle=False,   # set shuffle=False, otherwise it will sample a different subset of data every epoch with train_percent_check
                        num_workers=self.args.num_workers,  
                        collate_fn=hotpotqaDataset.collate_one_doc_and_lists)

        self.train_dataloader_object = dl  
        return self.train_dataloader_object
    
 

    @pl.data_loader
    def val_dataloader(self):
        if self.val_dataloader_object is not None:
            return self.val_dataloader_object
        dataset = hotpotqaDataset(file_path=self.args.dev_dataset, tokenizer=self.tokenizer,
                                  max_seq_len=self.args.max_seq_len, max_doc_len=self.args.max_doc_len,
                                  doc_stride=self.args.doc_stride,
                                  max_num_answers=self.args.max_num_answers,
                                  max_question_len=self.args.max_question_len,
                                  ignore_seq_with_no_answers=False)  # evaluation data should keep all examples 

        
        
#         dist_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        dl = DataLoader(dataset, batch_size=1, shuffle=False,
                        num_workers=self.args.num_workers, 
                        collate_fn=hotpotqaDataset.collate_one_doc_and_lists)
        self.val_dataloader_object = dl
        return self.val_dataloader_object

    @pl.data_loader
    def test_dataloader(self):
        if self.test_dataloader_object is not None:
            return self.test_dataloader_object
        dataset = hotpotqaDataset(file_path=self.args.dev_dataset, tokenizer=self.tokenizer,
                                  max_seq_len=self.args.max_seq_len, max_doc_len=self.args.max_doc_len,
                                  doc_stride=self.args.doc_stride,
                                  max_num_answers=self.args.max_num_answers,
                                  max_question_len=self.args.max_question_len,
                                  ignore_seq_with_no_answers=False)  # evaluation data should keep all examples

#         dist_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        dl = DataLoader(dataset, batch_size=1, shuffle=False,
                        num_workers=self.args.num_workers, 
                        collate_fn=hotpotqaDataset.collate_one_doc_and_lists)
        self.test_dataloader_object = dl
        return self.test_dataloader_object

#%%add_to hotpotqa  
    def forward(self, input_ids, attention_mask, segment_ids, start_positions, end_positions, q_type, sp_sent, sp_para):
 
 
        if 'longformer' in self.args.model_path:
            
            if(input_ids.size(0) != 1):
                print("input_ids.size(0) != 1")
            assert(input_ids.size(0)==1)
            # Each batch is one document, and each row of the batch is a chunck of the document.    ????
            # Make sure all rows have the same question length.
            
#             print("start_positions: ", start_positions)
#             print("end_positions: ", end_positions)
            # local attention everywhere
            attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device)
            
            # global attention for the cls and all question tokens
            # print("question_end_index")
            question_end_index = self._get_special_index(input_ids, [QUESTION_END])
    #         if(question_end_index.size(0) == 1):
    #             attention_mask[:,:question_end_index.item()] = 2  
    #         else:
            attention_mask[:,:question_end_index[0].item()+1] = 2  # from <cls> until </q>
    #             print("more than 1 <q> in: ", self.tokenizer.convert_ids_to_tokens(input_ids[0].tolist()) )
            
            # global attention for the sentence and paragraph special tokens  
            # print("sent_indexes")
            sent_indexes = self._get_special_index(input_ids, [SENT_MARKER_END])
            attention_mask[:, sent_indexes] = 2
            
            # 
            para_indexes = self._get_special_index(input_ids, [TITLE_START])
            attention_mask[:, para_indexes] = 2       
            print("para_indexes: ", para_indexes) 
    
            # sliding_chunks implemenation of selfattention requires that seqlen is multiple of window size
            input_ids, attention_mask = pad_to_window_size(
                input_ids, attention_mask, self.args.attention_window, self.tokenizer.pad_token_id)
    
            sequence_output = self.model(
                    input_ids,
                    attention_mask=attention_mask)[0]
    #         print("size of sequence_output: " + str(sequence_output.size()))
#             print("sequence_output: " + str(sequence_output))
    
            # The pretrained hotpotqa model wasn't trained with padding, so remove padding tokens
            # before computing loss and decoding.
            padding_len = input_ids[0].eq(self.tokenizer.pad_token_id).sum()
            if padding_len > 0:
                sequence_output = sequence_output[:, :-padding_len]
    #         print("size of sequence_output after removing padding: " + str(sequence_output.size()))
        else:
            sequence_output = self.model(input_ids, attention_mask=attention_mask)[0]      
        
        ###################################### layers on top of sequence_output ##################################
        

        ### 1. answer start and end positions classification ###   
        logits = self.qa_outputs(sequence_output) 
        start_logits, end_logits = logits.split(1, dim=-1) 
        start_logits = start_logits.squeeze(-1) 
        end_logits = end_logits.squeeze(-1)
 
        ### 2. type classification, similar as class LongformerClassificationHead(nn.Module) https://huggingface.co/transformers/_modules/transformers/modeling_longformer.html#LongformerForSequenceClassification.forward ### 
        type_logits = self.linear_type(sequence_output[:,0]) 
        
        # ### 3. supporting paragraph classification ###  
        sp_para_output = sequence_output[:,para_indexes,:]  
        print("sp_para_output: ", sp_para_output)
        sp_para_output_t = self.fnn_sp_para(sp_para_output) 
        print("sp_para_output_t: ", sp_para_output_t)
         # linear_sp_sent generates a single score for each sentence, instead of 2 scores for yes and no.   
        # Argument the score with additional score=0. The same way did in the HOTPOTqa paper
        sp_para_output_aux = torch.zeros(sp_para_output_t.shape, dtype=torch.float, device=sp_para_output_t.device) 
        predict_support_para = torch.cat([sp_para_output_aux, sp_para_output_t], dim=-1).contiguous() 
#         print("predict_support_para: ", predict_support_para)
        ### 4. supporting fact classification ###     
        # the first sentence in a paragraph is leading by <p>, other sentences are leading by <s>
 
        sp_sent_output = sequence_output[:,sent_indexes,:]  
        sp_sent_output_t = self.fnn_sp_sent(sp_sent_output)     
        sp_sent_output_aux = torch.zeros(sp_sent_output_t.shape, dtype=torch.float, device=sp_sent_output_t.device) 
        predict_support_sent = torch.cat([sp_sent_output_aux, sp_sent_output_t], dim=-1).contiguous() 
        
#         answer_loss, type_loss = self.loss_computation(start_positions, end_positions, start_logits, end_logits, q_type, type_logits)
        outputs = (start_logits, end_logits, type_logits, sp_para_output_t, sp_sent_output_t)  
        answer_loss, type_loss, sp_para_loss, sp_sent_loss  = self.loss_computation(start_positions, end_positions, start_logits, end_logits, q_type, type_logits, sp_para, predict_support_para, sp_sent, predict_support_sent)
 
        outputs = (answer_loss, type_loss, sp_para_loss, sp_sent_loss,) + outputs    
        return outputs
    
    def loss_computation(self, start_positions, end_positions, start_logits, end_logits, q_type, type_logits, sp_para, predict_support_para, sp_sent, predict_support_sent):
        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.size()) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.size()) > 1:
                end_positions = end_positions.squeeze(-1)

            if not self.args.regular_softmax_loss:
                # loss function suggested in section 2.2 here https://arxiv.org/pdf/1710.10723.pdf
                # NOTE: this returns sum of losses, not mean, so loss won't be normalized across different batch sizes
                # but batch size is always 1, so this is not a problem
                start_loss = self.or_softmax_cross_entropy_loss_one_doc(start_logits, start_positions, ignore_index=-1)
                end_loss = self.or_softmax_cross_entropy_loss_one_doc(end_logits, end_positions, ignore_index=-1)
            else: 
                start_positions = start_positions[:, 0:1]   # only use the top1 start_position considering only one appearance of the answer string
                end_positions = end_positions[:, 0:1]
                start_loss = crossentropy(start_logits, start_positions[:, 0])
                end_loss = crossentropy(end_logits, end_positions[:, 0])
                
 
            crossentropy = torch.nn.CrossEntropyLoss(ignore_index=-1)
            type_loss = crossentropy(type_logits, q_type)  
            
            crossentropy_average = torch.nn.CrossEntropyLoss(reduction = 'mean', ignore_index=-1)     
            if(sp_para.view(-1).size(0) > 0):
#                 print("predict_support_para.view(-1, 2): ", predict_support_para.view(-1, 2))
#                 print("sp_para.view(-1): ", sp_para.view(-1))
                sp_para_loss = crossentropy_average(predict_support_para.view(-1, 2), sp_para.view(-1))
#                 print("sp_para_loss: ", sp_para_loss)
            else:
                sp_para_loss = torch.tensor(0).type_as(type_loss)  # when raw_context is []
                
            if(sp_sent.view(-1).size(0) > 0):    
                sp_sent_loss = crossentropy_average(predict_support_sent.view(-1, 2), sp_sent.view(-1))      
            else:
                sp_sent_loss = torch.tensor(0).type_as(type_loss)  # when raw_context is []
 
            answer_loss = (start_loss + end_loss) / 2 
        return answer_loss, type_loss, sp_para_loss, sp_sent_loss  


#     %%add_to hotpotqa    
    def _get_special_index(self, input_ids, special_tokens):
        
        if(input_ids.size(0)!=1):
            print("input_ids.size(0): ", input_ids.size(0))
            print("input_ids: ", input_ids)
        
        assert(input_ids.size(0)==1) 
        mask = input_ids != input_ids # initilaize 
        for special_token in special_tokens:
            mask = torch.logical_or(mask, input_ids.eq(self.tokenizer.convert_tokens_to_ids(special_token))) 
 
        token_indices = torch.nonzero(mask, as_tuple=False)    
         
 
        return token_indices[:,1]    

    def or_softmax_cross_entropy_loss_one_doc(self, logits, target, ignore_index=-1, dim=-1):
        """loss function suggested in section 2.2 here https://arxiv.org/pdf/1710.10723.pdf"""
        assert logits.ndim == 2
        assert target.ndim == 2
        assert logits.size(0) == target.size(0) 
        
        # with regular CrossEntropyLoss, the numerator is only one of the logits specified by the target, considing only one correct target 
        # here, the numerator is the sum of a few potential targets, where some of them is the correct answer, considing more correct targets

        # target are indexes of tokens, padded with ignore_index=-1
        # logits are scores (one for each label) for each token
 
        # compute a target mask
        target_mask = target == ignore_index
        # replaces ignore_index with 0, so `gather` will select logit at index 0 for the masked targets
        masked_target = target * (1 - target_mask.long())                 # replace all -1 in target with 0， tensor([[447,   0,   0,   0, ...]])
    
        # gather logits
        gathered_logits = logits.gather(dim=dim, index=masked_target)     # tensor([[0.4382, 0.2340, 0.2340, 0.2340 ... ]]), padding logits are all replaced by logits[0] 
 
        # Apply the mask to gathered_logits. Use a mask of -inf because exp(-inf) = 0
        gathered_logits[target_mask] = float('-inf')                      # padding logits are all replaced by -inf
 
        # each batch is one example
        gathered_logits = gathered_logits.view(1, -1)
        logits = logits.view(1, -1)
 
        # numerator = log(sum(exp(gathered logits)))
        log_score = torch.logsumexp(gathered_logits, dim=dim, keepdim=False)
 
        log_norm = torch.logsumexp(logits, dim=dim, keepdim=False)
        
        # compute the loss
        loss = -(log_score - log_norm) 
        
        # some of the examples might have a loss of `inf` when `target` is all `ignore_index`: when computing start_loss and end_loss for question with the gold answer of yes/no 
        # when `target` is all `ignore_index`, loss is 0 
        loss = loss[~torch.isinf(loss)].sum()
#         loss = torch.tanh(loss)
#         print("final loss: " + str(loss)) 
        return loss  


In [18]:
# debug
# input_ids = torch.tensor([[-1, 5, -1, 2]])
# input_ids.size(0)
# token_indices =  torch.nonzero(input_ids == torch.tensor(-1))[:,1]
# # token_indices
# # token_indices.item()
# # indices =  torch.LongTensor([[2],[0,2]])

# # torch.gather(input_ids, 1, token_indices.unsqueeze(0))
# # p_index = token_indices.view(input_ids.size(0), -1)[:,1::2]   
# # attention_mask = torch.ones(input_ids.shape, dtype=torch.long) 
# # attention_mask[:,token_indices] = 2
# # attention_mask
# p_index = torch.tensor([1, 3, 4])
# s_index = torch.tensor([1,3,6])
# torch.sort(torch.cat((s_index, p_index)))[0]
# attention_mask.view(-1)[ p_index.view(-1), :].view(attention_mask.size(0), -1)
# # for pi in p_index[0]:
# #     attention_mask[:, pi] = 2
# # attention_mask
# # s_index = torch.tensor([[1,3]])
# # torch.sort(torch.cat((p_index, s_index), -1), -1)

# sequence_output  = torch.tensor([[[-1, 5, -1, 2],
#                                  [-2, 27, 2, 9],
#                                  [3, 6, 1, 65],
#                                  [52, 36, 13, 2],
#                                  [73, 26, 1, 7]
#                                 ]])

# sp_para_output_t   = torch.tensor([[[-1],
#                                  [-2 ],
#                                  [3],
#                                  [52],
#                                  [73]
#                                 ]])
# torch.zeros(sp_para_output_t.shape, dtype=torch.float) 

# print("size of sequence_output: " + str(sequence_output.size()))
# # print("size of p_index.unsqueeze(0).unsqueeze(-1): " + str(p_index.unsqueeze(0).size()))
# sequence_output[:,p_index,:]
# b = torch.tensor([0, 1, 2, 3])
# p_index.unsqueeze(-1) * b

# input_ids = torch.tensor([[0.2, 0.0, 0.6, 0.6], [0.2, 0.6, 0.0, 0.0]]) 
# # input_ids.tolist()
# p_index =  torch.nonzero(input_ids == torch.tensor(0.2))
# print(p_index)
# s_index =  torch.nonzero(input_ids == torch.tensor(0.6))
# print(s_index)

# sp_sent = torch.tensor([[0, 1, 1, 0]])
# torch.nonzero(sp_sent, as_tuple=True)[1]
# cat_index = torch.tensor([])
# cat_index = torch.cat((cat_index, ids[0][1]))
# print(ids)
# print(cat_index)
# p_index[p_index[:,0] == 0]

# cat_index[cat_index[:,0].argsort()]

# sorted(torch.cat((p_index, s_index)), key = lambda x: x[0])
# torch.sort(torch.cat((p_index, s_index)), 0)[0]
# for cor in token_indices:
#     attention_mask[cor[0].item()][cor[1].item()] = 2
# attention_mask 
# input_ids = torch.tensor([[-1, 5, -6, 2]])
# print(input_ids.size())
# input_ids.topk(k=2, dim=-1).indices

# predict_type = torch.tensor([[-0.0925, -0.0999, -0.1671]])
# p_type = torch.argmax(predict_type, dim=1).item()
# p_type_score = torch.max(predict_type, dim=1)[0].item()
# print("predict_type: ", predict_type)
# print("p_type: ", p_type)
# print("p_type_score: ", p_type_score)
    
# a = torch.tensor([[0.9213,  1.0887, -0.8858, -1.7683]])
# a.view(-1).size() 
# print(torch.sigmoid(a))
# a = torch.tensor([ 9.213,  1.0887, -0.8858, 7683])
# print(torch.sigmoid(a))

# a = torch.tensor([[[1],[2],[4],[-1],[-1]]])
# a= a.squeeze(-1)
# a.size() 
# a[:, torch.where(a!=-1)[1]]
# m = torch.nn.Sigmoid()
# print("m: ", m)
# loss = torch.nn.BCELoss()
# # input = torch.randn(3, requires_grad=True)
# # print("input: ", input)
# # target = torch.empty(3).random_(2)
# # print("target: ", target)
# # output = loss(m(input), target)
# # print("output: ", output)

# input = torch.tensor([1.0293, -0.1585,  1.1408], requires_grad=True)
# print("input: ", input)
# print("Sigmoid(input): ", m(input))
# target = torch.tensor([0., 1., 0.])
# print("target: ", target)
# output = loss(m(input), target)
# print("output: ", output)

# input = torch.tensor([[1.0293, -0.1585,  1.1408]], requires_grad=True)
# print("input: ", input)
# target = torch.tensor([[0., 1., 0.]])
# print("target: ", target)
# output = loss(m(input), target)
# print("output: ", output)

# 1.1761 * 3
# soft_input = torch.nn.Softmax(dim=-1)
# log_soft_input = torch.log(soft_input(input))
# loss=torch.nn.NLLLoss() 
# loss(log_soft_input, target)
# input = torch.log(soft_input(input))
# loss=torch.nn.NLLLoss()
# loss(input,target)

# loss =torch.nn.CrossEntropyLoss()
# loss(input,target) 

# sp_sent_logits =torch.tensor([[[0.0988],
#          [0.0319],
#          [0.0314]]])
# sp_sent_logits.squeeze()

# input_ids = torch.tensor([[0.6, 0.0, 0.6, 0.0]]) 
# token_indices =  torch.nonzero(input_ids == torch.tensor(0.6))
# token_indices[:,1][0].item()

# def or_softmax_cross_entropy_loss_one_doc(logits, target, ignore_index=-1, dim=-1):
#     """loss function suggested in section 2.2 here https://arxiv.org/pdf/1710.10723.pdf"""
#     assert logits.ndim == 2
#     assert target.ndim == 2
#     assert logits.size(0) == target.size(0) 

#     # with regular CrossEntropyLoss, the numerator is only one of the logits specified by the target, considing only one correct target 
#     # here, the numerator is the sum of a few potential targets, where some of them is the correct answer, considing more correct targets

#     # target are indexes of tokens, padded with ignore_index=-1
#     # logits are scores (one for each label) for each token
# #         print("or_softmax_cross_entropy_loss_one_doc" ) 
# #         print("size of logits: " + str(logits.size()))                    # torch.Size([1, 746]), 746 is number of all tokens 
# #         print("size of target: " + str(target.size()))                    # torch.Size([1, 64]),  -1 padded
#     print("target: " + str(target)) 

#     # compute a target mask
#     target_mask = target == ignore_index
#     # replaces ignore_index with 0, so `gather` will select logit at index 0 for the masked targets
#     masked_target = target * (1 - target_mask.long())                 # replace all -1 in target with 0， tensor([[447,   0,   0,   0, ...]])
#     print("masked_target: " + str(masked_target))     
#     # gather logits
#     gathered_logits = logits.gather(dim=dim, index=masked_target)     # tensor([[0.4382, 0.2340, 0.2340, 0.2340 ... ]]), padding logits are all replaced by logits[0] 
# #         print("size of gathered_logits: " + str(gathered_logits.size()))  # torch.Size([1, 64])
#     print("gathered_logits: " + str(gathered_logits)) 
#     # Apply the mask to gathered_logits. Use a mask of -inf because exp(-inf) = 0
#     gathered_logits[target_mask] = float('-inf')                      # padding logits are all replaced by -inf
#     print("gathered_logits after -inf: " + str(gathered_logits))      # tensor([[0.4382,   -inf,   -inf,   -inf,   -inf,...]])

#     # each batch is one example
#     gathered_logits = gathered_logits.view(1, -1)
#     logits = logits.view(1, -1)
# #         print("size of gathered_logits after view: " + str(gathered_logits.size()))  # torch.Size([1, 64])
# #         print("size of logits after view: " + str(logits.size()))                    # torch.Size([1, 746])　　

#     # numerator = log(sum(exp(gathered logits)))
#     log_score = torch.logsumexp(gathered_logits, dim=dim, keepdim=False)
#     print("log_score: " + str(log_score)) 
#     # denominator = log(sum(exp(logits)))
#     log_norm = torch.logsumexp(logits, dim=dim, keepdim=False)
#     print("log_norm: " + str(log_norm)) 

#     # compute the loss
#     loss = -(log_score - log_norm)
#     print("loss: " + str(loss))


#     # some of the examples might have a loss of `inf` when `target` is all `ignore_index`: when computing start_loss and end_loss for question with the gold answer of yes/no 
#     # replace -inf with 0
#     loss = loss[~torch.isinf(loss)].sum()
#     print("final loss: " + str(loss)) 
#     return loss 

# # input = torch.tensor([[ 0,  0.0780],
# #         [0, 0.9253 ],
# #         [0, 0.0987]])
# # target = torch.tensor([0,1,0])
# # target.size(0) < 1
# # input = torch.tensor([[ 1.1879,  1.0780,  0.5312],
# #         [-0.3499, -1.9253, -1.5725],
# #         [-0.6578, -0.0987,  1.1570]])
# # target=torch.tensor([0,1,2])
# # predict_support_para.view(-1, 2), sp_para.view(-1)
# # input = torch.tensor([[ 1.1879,  1.0780,  0.5312]])
# # target=torch.tensor([0])
# # or_softmax_cross_entropy_loss_one_doc(input, target.unsqueeze(-1))
# # target=torch.tensor([1])
# # or_softmax_cross_entropy_loss_one_doc(input, target.unsqueeze(-1))
# # target=torch.tensor([2])
# # or_softmax_cross_entropy_loss_one_doc(input, target.unsqueeze(-1))
# # target=torch.tensor([-1])
# # or_softmax_cross_entropy_loss_one_doc(input, target.unsqueeze(-1))
# a = torch.tensor([6.4062])    
# b = torch.tensor([2.23])
# torch.cat((a,b))
 
# for a in list_tensor
# from functools import reduce
# reduce(lambda x,y: torch.cat((x,y)), list_tensor[:-1])

# torch.tanh(a)
# # if(torch.isinf(a)):
# #     print("is inf")
# 5 * 1e-2


# import torch
# special_tokens = [1,2]
# input_ids = torch.tensor([[ 1, 0, 2, 1, 0, 2]])

# mask = input_ids != input_ids # initilaize 
# for special_token in special_tokens:
#     mask = torch.logical_or(mask, input_ids.eq(special_token)) 
#     print("mask: ", mask)
# torch.nonzero(mask)    
 

In [19]:
# # debug: check loaded dataset by DataLoader
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
# num_new_tokens = tokenizer.add_special_tokens({"additional_special_tokens": ["<p>", "<q>", "</q>"]})
# # # # print(tokenizer.all_special_tokens)    
# # # # print(tokenizer.all_special_ids)     
# # # # tokenizer.convert_tokens_to_ids("<s>")
# # # # tokenizer.sep_token
# print(tokenizer.tokenize("yes"))
# print(tokenizer.tokenize("no"))
# print(tokenizer.tokenize("null"))
# # # all_doc_tokens = []
# # # orig_to_tok_index = []
# # # tok_to_orig_index = []
# # # for (i, token) in enumerate(["<s>", "da", "tell", "<p>", "say"]):
# # #     orig_to_tok_index.append(len(all_doc_tokens))
# # #     sub_tokens = tokenizer.tokenize(f'. {token}')[1:] if i > 0 else tokenizer.tokenize(token)
# # #     for sub_token in sub_tokens:
# # #         tok_to_orig_index.append(i)
# # #         all_doc_tokens.append(sub_token)
# # # all_doc_tokens


 

# dataset = hotpotqaDataset(file_path= args.train_dataset, tokenizer=tokenizer,
#                           max_seq_len= args.max_seq_len, max_doc_len= args.max_doc_len,
#                           doc_stride= args.doc_stride,
#                           max_num_answers= args.max_num_answers,
#                           max_question_len= args.max_question_len,
#                           ignore_seq_with_no_answers= args.ignore_seq_with_no_answers)
# print(len(dataset))

# # # dl = DataLoader(dataset, batch_size=1, shuffle=None,
# # #                     num_workers=args.num_workers, sampler=None,
# # #                     collate_fn=hotpotqaDataset.collate_one_doc_and_lists)

# example = dataset[3]  
# [input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para, qids] = example
 

# print(input_ids[0][:20].tolist())
# print(input_mask) 
# print(segment_ids) 
# print(subword_starts) 
# print(subword_ends)
# print(q_type)
# print(sp_sent) 
# print(sp_para) 
# print(qids)
# print(tokenizer.convert_ids_to_tokens(input_ids[0][667:669+1].tolist()))
# 0.0033 * 90447 
# 28*4
# torch.tensor(0.0)

##### configure_ddp

In [20]:
 %%add_to hotpotqa
 # A hook to overwrite to define your own DDP(DistributedDataParallel) implementation init. 
 # The only requirement is that: 
 # 1. On a validation batch the call goes to model.validation_step.
 # 2. On a training batch the call goes to model.training_step.
 # 3. On a testing batch, the call goes to model.test_step
 def configure_ddp(self, model, device_ids):
    model = LightningDistributedDataParallel(
        model,
        device_ids=device_ids,
        find_unused_parameters=True
    )
    return model

##### **configure_optimizers**

In [21]:
%%add_to hotpotqa
def configure_optimizers(self):
    # Set up optimizers and (optionally) learning rate schedulers
    def lr_lambda(current_step):
        if current_step < self.args.warmup:
            return float(current_step) / float(max(1, self.args.warmup))
        return max(0.0, float(self.args.steps - current_step) / float(max(1, self.args.steps - self.args.warmup)))

    optimizer = torch.optim.Adam(self.parameters(), lr=self.args.lr)

    scheduler = LambdaLR(optimizer, lr_lambda, last_epoch=-1)
    return [optimizer], [{"scheduler": scheduler, "interval": "step"}]

##### **training_step**

In [22]:
%%add_to hotpotqa
def training_step(self, batch, batch_nb):
    # do the forward pass and calculate the loss for a batch 
#     input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, qid, answer = batch 
    input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para, qid, answer = batch
#     print("qid: " + str(qid)) 
    # print("size of input_ids: " + str(input_ids.size())) 
    output = self.forward(input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para)
#     answer_loss, type_loss = output[:2]
    # answer_loss, type_loss = output[:4]
    answer_loss, type_loss, sp_para_loss, sp_sent_loss  = output[:4]
    # print("answer_loss: ", answer_loss)
    # print("type_loss: ", type_loss)
    # print("sp_para_loss: ", sp_para_loss)
    # print("sp_sent_loss: ", sp_sent_loss)

#     loss  = answer_loss +  type_loss + sp_para_loss + sp_sent_loss
    loss = answer_loss + 5*type_loss + 10*sp_para_loss + 10*sp_sent_loss
#     print("weighted loss: ", loss)
#     print("self.trainer.optimizers[0].param_groups[0]['lr']: ", self.trainer.optimizers[0].param_groups[0]['lr'])
    lr = loss.new_zeros(1) + self.trainer.optimizers[0].param_groups[0]['lr']  # loss.new_zeros(1) is tensor([0.]), converting 'lr' to tensor' by adding it.  

    tensorboard_logs = {'loss': loss, 'train_answer_loss': answer_loss, 'train_type_loss': type_loss, 
                         'train_sp_para_loss': sp_para_loss, 'train_sp_sent_loss': sp_sent_loss, 
                        'lr': lr #,
                        # 'mem': torch.tensor(torch.cuda.memory_allocated(input_ids.device) / 1024 ** 3).type_as(loss) 
    }
    return tensorboard_logs

##### training_end

In [23]:
# %%add_to hotpotqa
    # # the function is called for each batch after every epoch is completed
    # def training_end(self, output): 
    #     # print("training_end at epoch: ", self.current_epoch)
    # #     print("len(outputs): ",len(outputs))
    # #     print("output: ",output)
    
    #     # one batch only has one example
    #     avg_loss = output['loss']    
    #     avg_answer_loss = output['train_answer_loss']  
    #     avg_type_loss = output['train_type_loss']    
    #     avg_sp_para_loss = output['train_sp_para_loss']   
    #     avg_sp_sent_loss = output['train_sp_sent_loss'] 
    #     avg_lr = output['lr']      
         
     
    #     if self.trainer.use_ddp:
    #         torch.distributed.all_reduce(avg_loss, op=torch.distributed.ReduceOp.SUM)
    #         avg_loss /= self.trainer.world_size 
    #         torch.distributed.all_reduce(avg_answer_loss, op=torch.distributed.ReduceOp.SUM)
    #         avg_answer_loss /= self.trainer.world_size 
    #         torch.distributed.all_reduce(avg_type_loss, op=torch.distributed.ReduceOp.SUM)
    #         avg_type_loss /= self.trainer.world_size 
    #         torch.distributed.all_reduce(avg_sp_para_loss, op=torch.distributed.ReduceOp.SUM)
    #         avg_sp_para_loss /= self.trainer.world_size 
    #         torch.distributed.all_reduce(avg_sp_sent_loss, op=torch.distributed.ReduceOp.SUM)
    #         avg_sp_sent_loss /= self.trainer.world_size 
    #         torch.distributed.all_reduce(avg_lr, op=torch.distributed.ReduceOp.SUM)
    #         avg_lr /= self.trainer.world_size 
            
     
    #     tensorboard_logs = { #'avg_train_loss': avg_loss, 
    #             'avg_train_answer_loss': avg_answer_loss, 'avg_train_type_loss': avg_type_loss, 'avg_train_sp_para_loss': avg_sp_para_loss, 'avg_train_sp_sent_loss': avg_sp_sent_loss, 'lr': avg_lr
    #           }
    
    #     return {'loss': avg_loss, 'log': tensorboard_logs}


##### validation_step

In [24]:
%%add_to hotpotqa
# When the validation_step is called, the model has been put in eval mode and PyTorch gradients have been disabled. At the end of validation, model goes back to training mode and gradients are enabled.
def validation_step(self, batch, batch_nb):
    print("validation_step")
#     print("batch_nb: ", batch_nb)
    input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para, qid, answer = batch
#     input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, qid, answer = batch
    print("qid: ", qid)
#     print("q_type: ", q_type)
    output = self.forward(input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para)
#     answer_loss, type_loss, start_logits, end_logits, type_logits = output 
    answer_loss, type_loss, sp_para_loss, sp_sent_loss, start_logits, end_logits, type_logits, sp_para_output, sp_sent_output = output 
    loss = answer_loss + 5*type_loss + 10*sp_para_loss + 10*sp_sent_loss

#     if(q_type.item() != -1 ):
    answers_pred, sp_sent_pred, sp_para_pred = self.decode(input_ids, start_logits, end_logits, type_logits, sp_para_output, sp_sent_output)
#         answers_pred  = self.decode(input_ids, start_logits, end_logits, type_logits)
#     else:
#         answers_pred  = [{'text': '', 'score': -1000000, 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': 1}]

    if(len(answers_pred) != 1):
        print("len(answers_pred) != 1")
        assert(len(answers_pred) == 1)

    pre_answer_score = answers_pred[0]['score']  # (start_logit + end_logit + p_type_score) / 3
    pre_answer = _normalize_text(answers_pred[0]['text'])
#         print("pred answer_score: " + str(pre_answer_score))
#         print("pred answer_text: " + str(pre_answer)) 

    gold_answer = _normalize_text(answer)
    f1, prec, recall = self.f1_score(pre_answer, gold_answer)
    em = self.exact_match_score(pre_answer, gold_answer) 
    f1 = torch.tensor(f1).type_as(loss)
    prec = torch.tensor(prec).type_as(loss)
    recall = torch.tensor(recall).type_as(loss)
    em = torch.tensor(em).type_as(loss)
#         print("f1: " + str(f1))
#         print("prec: " + str(prec))
#         print("recall: " + str(recall))
#         print("em: " + str(em))  

    if(len(sp_sent_pred) > 0):
        sp_sent_em, sp_sent_precision, sp_sent_recall, sp_sent_f1 = self.sp_metrics(sp_sent_pred, torch.where(sp_sent.squeeze())[0].tolist())
        sp_sent_em = torch.tensor(sp_sent_em).type_as(loss)
        sp_sent_precision = torch.tensor(sp_sent_precision).type_as(loss)
        sp_sent_recall = torch.tensor(sp_sent_recall).type_as(loss)
        sp_sent_f1 = torch.tensor(sp_sent_f1).type_as(loss)

#         print("sp_sent_em: " + str(sp_sent_em))
#         print("sp_sent_precision: " + str(sp_sent_precision))
#         print("sp_sent_recall: " + str(sp_sent_recall))    
#         print("sp_sent_f1: " + str(sp_sent_f1))    

        joint_prec = prec * sp_sent_precision
        joint_recall = recall * sp_sent_recall
        if joint_prec + joint_recall > 0:
            joint_f1 = 2 * joint_prec * joint_recall / (joint_prec + joint_recall)
        else:
            joint_f1 = torch.tensor(0.0).type_as(loss)
        joint_em = em * sp_sent_em 

    else:
        sp_sent_em, sp_sent_precision, sp_sent_recall, sp_sent_f1 = torch.tensor(0.0).type_as(loss), torch.tensor(0.0).type_as(loss), torch.tensor(0.0).type_as(loss), torch.tensor(0.0).type_as(loss)
        joint_em, joint_f1, joint_prec, joint_recall =  torch.tensor(0.0).type_as(loss), torch.tensor(0.0).type_as(loss), torch.tensor(0.0).type_as(loss), torch.tensor(0.0).type_as(loss)


    return { 'vloss': loss, 'answer_loss': answer_loss, 'type_loss': type_loss, 
            'sp_para_loss': sp_para_loss, 'sp_sent_loss': sp_sent_loss,
               'answer_score': pre_answer_score, 'f1': f1, 'prec':prec, 'recall':recall, 'em': em ,
              'sp_em': sp_sent_em, 'sp_f1': sp_sent_f1, 'sp_prec': sp_sent_precision, 'sp_recall': sp_sent_recall,
              'joint_em': joint_em, 'joint_f1': joint_f1, 'joint_prec': joint_prec, 'joint_recall': joint_recall

    }


###### decode

In [25]:
%%add_to hotpotqa
def decode(self, input_ids, start_logits, end_logits, type_logits, sp_para_logits, sp_sent_logits):
    print("decode")

    
    # answer decode
    question_end_index = self._get_special_index(input_ids, [QUESTION_END]) 
    # one example per batch
    start_logits = start_logits.squeeze()
    end_logits = end_logits.squeeze()
#     print("start_logits: ", start_logits)
#     print("end_logits: ", end_logits)
    start_logits_indices = start_logits.topk(k=min(self.args.n_best_size, start_logits.size(0)), dim=-1).indices 
    end_logits_indices = end_logits.topk(k=min(self.args.n_best_size, end_logits.size(0)), dim=-1).indices 
    if(len(start_logits_indices.size()) > 1):
        print("len(start_logits_indices.size()): ", len(start_logits_indices.size()))
        assert("len(start_logits_indices.size()) > 1")
    p_type = torch.argmax(type_logits, dim=1).item()
    p_type_score = torch.max(type_logits, dim=1)[0] 
#     print("type_logits: ", type_logits)
    print("p_type: ", p_type)
#     print("p_type_score: ", p_type_score)

    answers = []
    if p_type == 0:
        potential_answers = []
        for start_logit_index in start_logits_indices: 
            for end_logit_index in end_logits_indices: 
                if start_logit_index <= question_end_index.item():
                    continue
                if end_logit_index <= question_end_index.item():
                    continue
                if start_logit_index > end_logit_index:
                    continue
                answer_len = end_logit_index - start_logit_index + 1
                if answer_len > self.args.max_answer_length:
                    continue
                potential_answers.append({'start': start_logit_index, 'end': end_logit_index,
                                          'start_logit': start_logits[start_logit_index],  # single logit score for start position at start_logit_index
                                          'end_logit': end_logits[end_logit_index]})    
        sorted_answers = sorted(potential_answers, key=lambda x: (x['start_logit'] + x['end_logit']), reverse=True) 
#             print("sorted_answers: " + str(sorted_answers))

        if len(sorted_answers) == 0:
            answers.append({'text': 'NoAnswerFound', 'score': -1000000, 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': p_type_score})
        else:
            answer = sorted_answers[0]
            answer_token_ids = input_ids[0, answer['start']: answer['end'] + 1]

            answer_tokens = self.tokenizer.convert_ids_to_tokens(answer_token_ids.tolist())

            # remove [/sent], <t> and </t>

            for special_token in [SENT_MARKER_END, TITLE_START, TITLE_END, self.tokenizer.sep_token]:
                try:
                    if(answer_tokens[0] == special_token):
                        answer['start_logit'] = -2000000
                    elif(answer_tokens[-1] == special_token):
                        answer['end_logit'] = -2000000

                    answer_tokens.remove(special_token)
                except:
                    pass

            text = self.tokenizer.convert_tokens_to_string(answer_tokens) 
            score = (answer['start_logit'] + answer['end_logit'] + p_type_score) / 3
            answers.append({'text': text, 'score': score, 'start_logit': answer['start_logit'], 'end_logit': answer['end_logit'], 'p_type_score': p_type_score})

    elif p_type == 1: 
        answers.append({'text': 'yes', 'score': p_type_score, 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': p_type_score})
    elif p_type == 2:
        answers.append({'text': 'no', 'score': p_type_score, 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': p_type_score}) 
    else:
        assert False 
    
    print('answers: ', answers)
    
    # sp decode
    sent_indexes = self._get_special_index(input_ids, [SENT_MARKER_END])
    para_indexes = self._get_special_index(input_ids, [TITLE_START])

    s_to_p_map = []   
    for s in sent_indexes:
        s_to_p = torch.where(torch.le(para_indexes, s))[0][-1]     # last para_index smaller or equal to s
        s_to_p_map.append(s_to_p.item())  # [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7, 7, 7, 8, 9]
    print('s_to_p_map: ', s_to_p_map)
    if(len(s_to_p_map)>0):      # https://arxiv.org/pdf/2004.06753.pdf section 3.3
        para_sent_logits_sum = torch.tensor([], device=sp_sent_logits.device)  
        evidence_candidates = {}
        para_sents_offset = [0]
        for i in range(s_to_p_map[-1]+1):
            para_sent_logits = torch.masked_select(sp_sent_logits.squeeze(), torch.tensor([p==i for p in s_to_p_map])) 
            print("para_sent_logits: " + str(para_sent_logits))
            para_sent_logits_sum = torch.cat([para_sent_logits_sum, torch.sum(para_sent_logits).unsqueeze(0) ])
            print("para_sent_logits_sum: " + str(para_sent_logits_sum))
            para_sents_offset.append(para_sent_logits.numel()+para_sents_offset[-1])  # [0, 21, 22, 24, 25, 26, 29, 30, 34, 35, 36], one more elements than num of paras   
            print("para_sents_offset: " + str(para_sents_offset))
            evidence_candidates[i] = torch.gt(para_sent_logits, 0.1).nonzero(as_tuple=True)[0]  # 0.1 is the threshold to be a candidate sentences
            print("evidence_candidates: " + str(evidence_candidates))
        # para_sent_logits_sum: tensor([ 7.8180e-01,  6.8700e-02,  1.6170e-01,  7.4000e-02,  6.0000e-04,  2.2680e-01, -3.0400e-02,  9.3400e-02,  1.1200e-01,  1.2470e-01])
        # evidence_candidates: sentences with logits larger than threshold in each para,  [tensor([ 1,  2,  4,  5,  6,  7,  8,  9, 10, 11, 13, 14, 16, 17, 19, 20]), tensor([0]), tensor([0, 1]), tensor([0]), tensor([0]), tensor([0, 1, 2]), tensor([], dtype=torch.int64), tensor([0, 2]), tensor([0]), tensor([0])]
        sp_para_pred = para_sent_logits_sum.squeeze().topk(k=min(para_sent_logits_sum.numel(), 2)).indices  # sp are from <=2 paragraphs
        print("sp_para_pred: " + str(sp_para_pred))
        sp_sent_pred = []
        if(sp_para_pred.numel() > 1):
            for para_idx in sp_para_pred: 
                if(para_idx.item() in evidence_candidates):
                    sp_sent_pred.extend([(para_sents_offset[para_idx]+sent).item() for sent in evidence_candidates[para_idx.item()]]) 
        elif(sp_para_pred.numel()==1 and sp_para_pred.item() in evidence_candidates):
            sp_sent_pred = [(para_sents_offset[sp_para_pred]+sent).item() for sent in evidence_candidates[sp_para_pred.item()]]
    else:
        sp_sent_pred = []
        sp_para_pred = [] 
    return (answers, sp_sent_pred, sp_para_pred)

###### metrics

In [26]:
%%add_to hotpotqa


def f1_score(self, prediction, ground_truth):
    normalized_prediction = _normalize_text(prediction)
    normalized_ground_truth = _normalize_text(ground_truth)
    ZERO_METRIC = (0, 0, 0)

    if normalized_prediction in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC
    if normalized_ground_truth in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC

    prediction_tokens = normalized_prediction.split()
    ground_truth_tokens = normalized_ground_truth.split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return ZERO_METRIC
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1, precision, recall


def exact_match_score(self, prediction, ground_truth):
    return int(_normalize_text(prediction) == _normalize_text(ground_truth))


def sp_metrics(self, prediction, gold): 
    tp, fp, fn = 0, 0, 0
    for e in prediction:
        if e in gold:
            tp += 1
        else:
            fp += 1 
    for e in gold:
        if e not in prediction:
            fn += 1 
    prec = 1.0 * tp / (tp + fp) if tp + fp > 0 else 0.0
    recall = 1.0 * tp / (tp + fn) if tp + fn > 0 else 0.0
    f1 = 2 * prec * recall / (prec + recall) if prec + recall > 0 else 0.0
    em = 1.0 if fp + fn == 0 else 0.0 
    return em, prec, recall, f1 



##### validation_end

In [27]:
%%add_to hotpotqa
# If a validation_step is not defined, this won't be called. Called at the end of the validation loop with the outputs of validation_step.
def validation_epoch_end(self, outputs):
    print("validation_epoch_end")
    avg_loss = torch.stack([x['vloss'] for x in outputs]).mean()  
    avg_answer_loss = torch.stack([x['answer_loss'] for x in outputs]).mean()  
    avg_type_loss = torch.stack([x['type_loss'] for x in outputs]).mean()  
    avg_sp_para_loss = torch.stack([x['sp_para_loss'] for x in outputs]).mean()  
    avg_sp_sent_loss = torch.stack([x['sp_sent_loss'] for x in outputs]).mean()  


    answer_scores = [x['answer_score'] for x in outputs] 
    f1_scores = [x['f1'] for x in outputs]  
    em_scores = [x['em'] for x in outputs]  
    prec_scores =  [x['prec'] for x in outputs] 
    recall_scores = [x['recall'] for x in outputs]  
    sp_sent_f1_scores = [x['sp_f1'] for x in outputs]   
    sp_sent_em_scores = [x['sp_em'] for x in outputs]   
    sp_sent_prec_scores = [x['sp_prec'] for x in outputs]   
    sp_sent_recall_scores = [x['sp_recall'] for x in outputs]   
    joint_f1_scores = [x['joint_f1'] for x in outputs]  
    joint_em_scores = [x['joint_em'] for x in outputs]  
    joint_prec_scores = [x['joint_prec'] for x in outputs]  
    joint_recall_scores = [x['joint_recall'] for x in outputs]



    print(f'before sync --> sizes:  {len(answer_scores)}, {len(f1_scores)}, {len(em_scores)}')
    if self.trainer.use_ddp:
        torch.distributed.all_reduce(avg_loss, op=torch.distributed.ReduceOp.SUM)
        avg_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_answer_loss, op=torch.distributed.ReduceOp.SUM)
        avg_answer_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_type_loss, op=torch.distributed.ReduceOp.SUM)
        avg_type_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_sp_para_loss, op=torch.distributed.ReduceOp.SUM)
        avg_sp_para_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_sp_sent_loss, op=torch.distributed.ReduceOp.SUM)
        avg_sp_sent_loss /= self.trainer.world_size 

        answer_scores = self.sync_list_across_gpus(answer_scores, avg_loss.device, torch.float)
        f1_scores = self.sync_list_across_gpus(f1_scores, avg_loss.device, torch.float)
        em_scores = self.sync_list_across_gpus(em_scores, avg_loss.device, torch.float)
        prec_scores = self.sync_list_across_gpus(prec_scores, avg_loss.device, torch.float)
        recall_scores = self.sync_list_across_gpus(recall_scores, avg_loss.device, torch.float)

        sp_sent_f1_scores = self.sync_list_across_gpus(sp_sent_f1_scores, avg_loss.device, torch.float)
        sp_sent_em_scores = self.sync_list_across_gpus(sp_sent_em_scores, avg_loss.device, torch.float)
        sp_sent_prec_scores = self.sync_list_across_gpus(sp_sent_prec_scores, avg_loss.device, torch.float)
        sp_sent_recall_scores = self.sync_list_across_gpus(sp_sent_recall_scores, avg_loss.device, torch.float)

        joint_f1_scores = self.sync_list_across_gpus(joint_f1_scores, avg_loss.device, torch.float)
        joint_em_scores = self.sync_list_across_gpus(joint_em_scores, avg_loss.device, torch.float)
        joint_prec_scores = self.sync_list_across_gpus(joint_prec_scores, avg_loss.device, torch.float)
        joint_recall_scores = self.sync_list_across_gpus(joint_recall_scores, avg_loss.device, torch.float)


    print(f'after sync --> sizes: {len(answer_scores)}, {len(f1_scores)}, {len(em_scores)}')

    avg_val_f1 = sum(f1_scores) / len(f1_scores)
    avg_val_em = sum(em_scores) / len(em_scores)
    avg_val_prec = sum(prec_scores) / len(prec_scores)
    avg_val_recall = sum(recall_scores) / len(recall_scores)
    avg_val_sp_sent_f1 = torch.tensor(sum(sp_sent_f1_scores) / len(sp_sent_f1_scores) ).type_as(avg_loss)   
    avg_val_sp_sent_em = torch.tensor(sum(sp_sent_em_scores) / len(sp_sent_em_scores) ).type_as(avg_loss)    
    avg_val_sp_sent_prec = torch.tensor(sum(sp_sent_prec_scores) / len(sp_sent_prec_scores) ).type_as(avg_loss)   
    avg_val_sp_sent_recall = torch.tensor(sum(sp_sent_recall_scores) / len(sp_sent_recall_scores) ).type_as(avg_loss)    
    avg_val_joint_f1 = torch.tensor(sum(joint_f1_scores) / len(joint_f1_scores) ).type_as(avg_loss)  
    avg_val_joint_em = torch.tensor(sum(joint_em_scores) / len(joint_em_scores) ).type_as(avg_loss)  
    avg_val_joint_prec = torch.tensor(sum(joint_prec_scores) / len(joint_prec_scores) ).type_as(avg_loss)   
    avg_val_joint_recall = torch.tensor(sum(joint_recall_scores) / len(joint_recall_scores) ).type_as(avg_loss) 

    print("avg_loss: ", avg_loss, end = '\t')   
    print("avg_answer_loss: ", avg_answer_loss, end = '\t') 
    print("avg_type_loss: ", avg_type_loss, end = '\t') 
    # print("avg_sp_para_loss: ", avg_sp_para_loss, end = '\t')   
    # print("avg_sp_sent_loss: ", avg_sp_sent_loss)   
    print("avg_val_f1: ", avg_val_f1, end = '\t')   
    print("avg_val_em: ", avg_val_em, end = '\t')   
    print("avg_val_prec: ", avg_val_prec, end = '\t')   
    print("avg_val_recall: ", avg_val_recall)   
    print("avg_val_sp_sent_f1: ", avg_val_sp_sent_f1, end = '\t')   
    print("avg_val_sp_sent_em: " , avg_val_sp_sent_em, end = '\t')  
    print("avg_val_sp_sent_prec: ", avg_val_sp_sent_prec, end = '\t')   
    print("avg_val_sp_sent_recall: ", avg_val_sp_sent_recall)   
    print("avg_val_joint_f1: " , avg_val_joint_f1, end = '\t')  
    print("avg_val_joint_em: ", avg_val_joint_em, end = '\t')   
    print("avg_val_joint_prec: ", avg_val_joint_prec, end = '\t')   
    print("avg_val_joint_recall: ", avg_val_joint_recall)   


    logs = {'avg_val_loss': avg_loss, 'avg_val_answer_loss': avg_answer_loss, 'avg_val_type_loss': avg_type_loss, 
        'avg_val_sp_para_loss': avg_sp_para_loss, 'avg_val_sp_sent_loss': avg_sp_sent_loss,   
        'avg_val_f1': avg_val_f1 , 'avg_val_em': avg_val_em,  'avg_val_prec': avg_val_prec, 'avg_val_recall': avg_val_recall ,    
        'avg_val_sp_sent_f1': avg_val_sp_sent_f1, 'avg_val_sp_sent_em': avg_val_sp_sent_em,  'avg_val_sp_sent_prec': avg_val_sp_sent_prec, 'avg_val_sp_sent_recall': avg_val_sp_sent_recall,    
        'avg_val_joint_f1': avg_val_joint_f1, 'avg_val_joint_em': avg_val_joint_em,  'avg_val_joint_prec': avg_val_joint_prec, 'avg_val_joint_recall': avg_val_joint_recall 
    }   

    return logs


def sync_list_across_gpus(self, l, device, dtype):
    l_tensor = torch.tensor(l, device=device, dtype=dtype)
    gather_l_tensor = [torch.ones_like(l_tensor) for _ in range(self.trainer.world_size)]
    torch.distributed.all_gather(gather_l_tensor, l_tensor)
    return torch.cat(gather_l_tensor).tolist()

##### test_step

In [28]:
%%add_to hotpotqa
def test_step(self, batch, batch_nb):
    input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para, qid, answer = batch
#     input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, qid, answer = batch

    print("test_step of qid: ", qid, end="\t") 
    output = self.forward(input_ids, input_mask, segment_ids, subword_starts, subword_ends, q_type, sp_sent, sp_para)
    answer_loss, type_loss, sp_para_loss, sp_sent_loss, start_logits, end_logits, type_logits, sp_para_output, sp_sent_output = output 
#     answer_loss, type_loss, start_logits, end_logits, type_logits = output 
    loss = answer_loss + 5*type_loss + 10*sp_para_loss + 10*sp_sent_loss

    answers_pred, sp_sent_pred, sp_para_pred = self.decode(input_ids, start_logits, end_logits, type_logits, sp_para_output, sp_sent_output)
#     answers_pred = self.decode(input_ids, start_logits, end_logits, type_logits)

    if(len(answers_pred) != 1):
        print("len(answers_pred) != 1")
        assert(len(answers_pred) == 1)

    pre_answer_score = answers_pred[0]['score']  # (start_logit + end_logit + p_type_score) / 3
    start_logit = answers_pred[0]['start_logit']
    end_logit = answers_pred[0]['end_logit']
    type_score = answers_pred[0]['p_type_score']
    pre_answer = _normalize_text(answers_pred[0]['text'])
    # print("pred answer_score: " + str(pre_answer_score))
    # print("pred answer_text: " + str(pre_answer)) 

    gold_answer = _normalize_text(answer)
    f1, prec, recall = self.f1_score(pre_answer, gold_answer)
    em = self.exact_match_score(pre_answer, gold_answer) 
    f1 = torch.tensor(f1).type_as(loss)
    prec = torch.tensor(prec).type_as(loss)
    recall = torch.tensor(recall).type_as(loss)
    em = torch.tensor(em).type_as(loss)

    print("pre_answer:\t", pre_answer, "\tgold_answer:\t", gold_answer) #, "\tstart_logits:\t", start_logits.cpu(), "\tend_logits:\t", end_logits.cpu(), "\ttype_logits:\t", type_logits.cpu())

    self.logger.log_metrics({'answer_loss': answer_loss, 'type_loss': type_loss, 
                                'answer_score': pre_answer_score, 'start_logit': start_logit, 'end_logit': end_logit,  
                                'type_score': type_score,
                                'f1': f1, 'prec':prec, 'recall':recall, 'em': em 
                            }) 


    return { 'vloss': loss, 'answer_loss': answer_loss, 'type_loss': type_loss, 
             'answer_score': pre_answer_score, 'start_logit': start_logit, 'end_logit': end_logit, 'type_score': type_score,
             'f1': f1, 'prec':prec, 'recall':recall, 'em': em,
            'sp_para_loss': sp_para_loss, 'sp_sent_loss': sp_sent_loss, 
            }



##### test_end

In [29]:
%%add_to hotpotqa
def test_epoch_end(self, outputs):
    print("test_epoch_end")
    avg_loss = torch.stack([x['vloss'] for x in outputs]).mean()  
    avg_answer_loss = torch.stack([x['answer_loss'] for x in outputs]).mean()  
    avg_type_loss = torch.stack([x['type_loss'] for x in outputs]).mean()  
    avg_sp_para_loss = torch.stack([x['sp_para_loss'] for x in outputs]).mean()  
    avg_sp_sent_loss = torch.stack([x['sp_sent_loss'] for x in outputs]).mean()  

    answer_scores = [x['answer_score'] for x in outputs]  # [item for sublist in outputs for item in sublist['answer_score']] #torch.stack([x['answer_score'] for x in outputs]).mean() # 
    f1_scores = [x['f1'] for x in outputs]  
    em_scores = [x['em'] for x in outputs]  
    prec_scores =  [x['prec'] for x in outputs] 
    recall_scores = [x['recall'] for x in outputs]  

    print(f'before sync --> sizes:  {len(answer_scores)}')
    if self.trainer.use_ddp:
        torch.distributed.all_reduce(avg_loss, op=torch.distributed.ReduceOp.SUM)
        avg_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_answer_loss, op=torch.distributed.ReduceOp.SUM)
        avg_answer_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_type_loss, op=torch.distributed.ReduceOp.SUM)
        avg_type_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_sp_para_loss, op=torch.distributed.ReduceOp.SUM)
        avg_sp_para_loss /= self.trainer.world_size 
        torch.distributed.all_reduce(avg_sp_sent_loss, op=torch.distributed.ReduceOp.SUM)
        avg_sp_sent_loss /= self.trainer.world_size 
        answer_scores = self.sync_list_across_gpus(answer_scores, avg_loss.device, torch.float)
        f1_scores = self.sync_list_across_gpus(f1_scores, avg_loss.device, torch.float)
        em_scores = self.sync_list_across_gpus(em_scores, avg_loss.device, torch.float)
        prec_scores = self.sync_list_across_gpus(prec_scores, avg_loss.device, torch.float)
        recall_scores = self.sync_list_across_gpus(recall_scores, avg_loss.device, torch.float)
#         int_qids = self.sync_list_across_gpus(int_qids, avg_loss.device, torch.int)
        answer_scores = self.sync_list_across_gpus(answer_scores, avg_loss.device, torch.float)


    print(f'after sync --> sizes: {len(answer_scores)}')
    print("answer_scores: ", answer_scores)
    avg_test_f1 = sum(f1_scores) / len(f1_scores)
    avg_test_em = sum(em_scores) / len(em_scores)
    avg_test_prec = sum(prec_scores) / len(prec_scores)
    avg_test_recall = sum(recall_scores) / len(recall_scores)     
    print("avg_loss: ", avg_loss, end = '\t') 
    print("avg_answer_loss: ", avg_answer_loss, end = '\t') 
    print("avg_type_loss: ", avg_type_loss, end = '\t') 
    print("avg_sp_para_loss: ", avg_sp_para_loss, end = '\t') 
    print("avg_sp_sent_loss: ", avg_sp_sent_loss, end = '\t')  

    logs = {'avg_test_loss': avg_loss, 'avg_test_answer_loss': avg_answer_loss, 'avg_test_type_loss': avg_type_loss, 
            'avg_test_f1': avg_test_f1 , 'avg_test_em': avg_test_em,  'avg_test_prec': avg_test_prec, 'avg_test_recall': avg_test_recall ,    
            'avg_val_sp_para_loss': avg_sp_para_loss, 'avg_val_sp_sent_loss': avg_sp_sent_loss
           }

    return {'avg_test_loss': avg_loss, 'log': logs}

##### add_model_specific_args

In [30]:
%%add_to hotpotqa
@staticmethod
def add_model_specific_args(parser, root_dir):
    parser.add_argument("--save_dir", type=str, default='jupyter-hotpotqa')
    parser.add_argument("--save_prefix", type=str, required=True)
    parser.add_argument("--train_dataset", type=str, required=False, help="Path to the training squad-format")
    parser.add_argument("--dev_dataset", type=str, required=True, help="Path to the dev squad-format")
    parser.add_argument("--batch_size", type=int, default=2, help="Batch size")
    parser.add_argument("--gpus", type=str, default='0',
                        help="Comma separated list of gpus. Default is gpu 0. To use CPU, use --gpus "" ")
    parser.add_argument("--warmup", type=int, default=1000, help="Number of warmup steps")
    parser.add_argument("--lr", type=float, default=0.00005, help="Maximum learning rate")
    parser.add_argument("--val_every", type=float, default=1.0, help="How often within one training epoch to check the validation set.")
    parser.add_argument("--val_percent_check", default=1.00, type=float, help='Percent of validation data used')
    parser.add_argument("--num_workers", type=int, default=4, help="Number of data loader workers")
    parser.add_argument("--seed", type=int, default=1234, help="Seed")
    parser.add_argument("--epochs", type=int, default=6, help="Number of epochs")
    parser.add_argument("--max_seq_len", type=int, default=4096,
                        help="Maximum length of seq passed to the transformer model")
    parser.add_argument("--max_doc_len", type=int, default=4096,
                        help="Maximum number of wordpieces of the input document")
    parser.add_argument("--max_num_answers", type=int, default=64,
                        help="Maximum number of answer spans per document (64 => 94%)")
    parser.add_argument("--max_question_len", type=int, default=55,
                        help="Maximum length of the question")
    parser.add_argument("--doc_stride", type=int, default=-1,
                        help="Overlap between document chunks. Use -1 to only use the first chunk")
    parser.add_argument("--ignore_seq_with_no_answers", action='store_true',
                        help="each example should have at least one answer. Default is False")
    parser.add_argument("--disable_checkpointing", action='store_true', help="No logging or checkpointing")
    parser.add_argument("--n_best_size", type=int, default=20,
                        help="Number of answer candidates. Used at decoding time")
    parser.add_argument("--max_answer_length", type=int, default=30,
                        help="maximum num of wordpieces/answer. Used at decoding time")
    parser.add_argument("--regular_softmax_loss", action='store_true', help="IF true, use regular softmax. Default is using ORed softmax loss")
    parser.add_argument("--test", action='store_true', help="Test only, no training")
    parser.add_argument("--model_path", type=str,
                        help="Path to the checkpoint directory")
    parser.add_argument("--no_progress_bar", action='store_true', help="no progress bar. Good for printing")
    parser.add_argument("--attention_mode", type=str, choices=['tvm', 'sliding_chunks'],
                        default='sliding_chunks', help='Which implementation of selfattention to use')
    parser.add_argument("--fp32", action='store_true', help="default is fp16. Use --fp32 to switch to fp32")
    parser.add_argument('--train_percent', type=float, default=1.0)
    return parser

##### class info

In [31]:
dir(hotpotqa)

['CHECKPOINT_HYPER_PARAMS_KEY',
 'CHECKPOINT_HYPER_PARAMS_NAME',
 'CHECKPOINT_HYPER_PARAMS_TYPE',
 'T_destination',
 '_LightningModule__get_hparams_assignment_variable',
 '__abstractmethods__',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_cache',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_apply',
 '_auto_collect_arguments',
 '_call_impl',
 '_forward_unimplemented',
 '_get_name',
 '_get_special_index',
 '_init_slurm_connection',
 '_load_from_state_dict',
 '_load_model_state',
 '_named_members',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',


In [32]:
from inspect import getmembers, isfunction
getmembers(hotpotqa)

[('CHECKPOINT_HYPER_PARAMS_KEY', 'hyper_parameters'),
 ('CHECKPOINT_HYPER_PARAMS_NAME', 'hparams_name'),
 ('CHECKPOINT_HYPER_PARAMS_TYPE', 'hparams_type'),
 ('T_destination', ~T_destination),
 ('_LightningModule__get_hparams_assignment_variable',
  <function pytorch_lightning.core.lightning.LightningModule.__get_hparams_assignment_variable(self)>),
 ('__abstractmethods__', frozenset()),
 ('__annotations__',
  {'_device': Ellipsis, '_dtype': typing.Union[str, torch.dtype]}),
 ('__call__',
  <function torch.nn.modules.module.Module._call_impl(self, *input, **kwargs)>),
 ('__class__', abc.ABCMeta),
 ('__delattr__',
  <function torch.nn.modules.module.Module.__delattr__(self, name)>),
 ('__dict__',
  mappingproxy({'__module__': '__main__',
                '__init__': <function __main__.hotpotqa.__init__(self, args)>,
                'load_model': <function __main__.hotpotqa.load_model(self)>,
                'train_dataloader': <function pytorch_lightning.core.decorators.data_loader.<local

In [33]:
functions_list = [o for o in getmembers(hotpotqa) if isfunction(o[1])]
functions_list

[('_LightningModule__get_hparams_assignment_variable',
  <function pytorch_lightning.core.lightning.LightningModule.__get_hparams_assignment_variable(self)>),
 ('__call__',
  <function torch.nn.modules.module.Module._call_impl(self, *input, **kwargs)>),
 ('__delattr__',
  <function torch.nn.modules.module.Module.__delattr__(self, name)>),
 ('__dir__', <function torch.nn.modules.module.Module.__dir__(self)>),
 ('__getattr__',
  <function torch.nn.modules.module.Module.__getattr__(self, name:str) -> Union[torch.Tensor, _ForwardRef('Module')]>),
 ('__init__', <function __main__.hotpotqa.__init__(self, args)>),
 ('__repr__', <function torch.nn.modules.module.Module.__repr__(self)>),
 ('__setattr__',
  <function torch.nn.modules.module.Module.__setattr__(self, name:str, value:Union[torch.Tensor, _ForwardRef('Module')]) -> None>),
 ('__setstate__',
  <function torch.nn.modules.module.Module.__setstate__(self, state)>),
 ('_apply', <function torch.nn.modules.module.Module._apply(self, fn)>),


In [34]:
import inspect
inspect.getmro(hotpotqa)  # a hierarchy of classes 

(__main__.hotpotqa,
 pytorch_lightning.core.lightning.LightningModule,
 abc.ABC,
 pytorch_lightning.utilities.device_dtype_mixin.DeviceDtypeModuleMixin,
 pytorch_lightning.core.grads.GradInformation,
 pytorch_lightning.core.saving.ModelIO,
 pytorch_lightning.core.hooks.ModelHooks,
 torch.nn.modules.module.Module,
 object)

In [35]:
help(hotpotqa.configure_optimizers)

Help on function configure_optimizers in module __main__:

configure_optimizers(self)



In [36]:
# import inspect
# code, line_no = inspect.getsourcelines(hotpotqa.training_step)
# print(''.join(code))

### main

In [37]:
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)
    if not args.test:     # if it needs to train, remove exsiting folder
        import shutil
        save_folder = os.path.join(args.save_dir, args.save_prefix)
        if os.path.exists(save_folder):
            shutil.rmtree(save_folder, ignore_errors=True)  #delete non-empty folder 
        
    import shutil
    save_folder = os.path.join(args.save_dir, args.save_prefix)
    if os.path.exists(save_folder):
        shutil.rmtree(save_folder, ignore_errors=True)  #delete non-empty folder


In [40]:
    hotpotqa.__abstractmethods__=set()   # without this, got an error "Can't instantiate abstract class hotpotqa with abstract methods" if these two abstract methods are not implemented in the same cell where class hotpotqa defined 
    model = hotpotqa(args)
#     model.to('cuda')    # this is necessary to use gpu
    

self.args.model_path:  /xdisk/msurdeanu/fanluo/hotpotQA/longformer-base-4096
Loaded model with config:
LongformerConfig {
  "attention_dilation": [
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1
  ],
  "attention_mode": "sliding_chunks",
  "attention_probs_dropout_prob": 0.1,
  "attention_window": [
    256,
    256,
    256,
    256,
    256,
    256,
    256,
    256,
    256,
    256,
    256,
    256
  ],
  "autoregressive": false,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "ignore_attention_mask": false,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 4098,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "type_vocab_size": 1,
  "vocab_size": 50265
}



In [41]:
    logger = TestTubeLogger( # The TestTubeLogger adds a nicer folder structure to manage experiments and snapshots all hyperparameters you pass to a LightningModule.
        save_dir=args.save_dir,
        name=args.save_prefix,
        version=0  # always use version=0
    )

In [42]:
    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(args.save_dir, args.save_prefix, "checkpoints"),
        save_top_k=5,
        verbose=True,
        monitor='avg_val_f1',
        mode='max',
        prefix=''
    )

In [43]:
    train_set_size = 9 * args.train_percent # 90447 * args.train_percent   # hardcode dataset size. Needed to compute number of steps for the lr scheduler
    print("train_set_size: ", train_set_size) 

    args.gpus = [int(x) for x in args.gpus.split(',')] if args.gpus!='' else None
    num_devices = 1 or len(args.gpus)
    print("num_devices: ", num_devices)

    train_set_size = 90447 * args.train_percent    # hardcode dataset size. Needed to compute number of steps for the lr scheduler
    args.steps = args.epochs * train_set_size / (args.batch_size * num_devices)

    print(f'>>>>>>> #train_set_size: {train_set_size}, #steps: {args.steps},  #warmup steps: {args.warmup}, #epochs: {args.epochs}, batch_size: {args.batch_size * num_devices} <<<<<<<')

train_set_size:  9.0
num_devices:  1
>>>>>>> #train_set_size: 90447.0, #steps: 271341.0,  #warmup steps: 1000, #epochs: 6, batch_size: 2 <<<<<<<


In [44]:
    trainer = pl.Trainer(gpus=args.gpus, distributed_backend='ddp', # if args.gpus and (len(args.gpus) > 1) else None,
                             track_grad_norm=-1, max_epochs=args.epochs, early_stop_callback=None,
                             accumulate_grad_batches=args.batch_size,
                             train_percent_check = args.train_percent,
        #                          val_check_interval=args.val_every,
                             val_percent_check=args.val_percent_check,
                             test_percent_check=args.val_percent_check,
                             logger=logger if not args.disable_checkpointing else False,
                             checkpoint_callback=checkpoint_callback if not args.disable_checkpointing else False,
                             show_progress_bar=args.no_progress_bar,
                             use_amp=not args.fp32, 
                             amp_level='O2',
#                              check_val_every_n_epoch=1
                             )


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Multi-processing is handled by Slurm.
CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


In [None]:
#     if not args.test: 
trainer.fit(model)


initializing ddp: GLOBAL_RANK: 0, MEMBER: 1/1
----------------------------------------------------------------------------------------------------
distributed_backend=ddp
All DDP processes registered. Starting ddp with 1 processes
----------------------------------------------------------------------------------------------------
Set SLURM handle signals.

  | Name        | Type       | Params
-------------------------------------------
0 | model       | Longformer | 148 M 
1 | qa_outputs  | Linear     | 1 K   
2 | linear_type | Linear     | 2 K   
3 | fnn_sp_sent | Sequential | 591 K 
4 | fnn_sp_para | Sequential | 591 K 


reading file: small_end.json


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

validation_step
qid:  5a736f7c5542991f29ee2e18
para_indexes:  tensor([  25,  611,  650,  709,  745,  793,  933,  980, 1127, 1197],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1436, -0.2822,  0.1904,  ..., -0.1859, -0.0976, -0.0387],
         [ 0.1160, -0.2718,  0.1098,  ..., -0.2303, -0.1094,  0.2376],
         [ 0.1845, -0.1362, -0.0262,  ..., -0.1628, -0.1756,  0.0744],
         ...,
         [ 0.1170,  0.0717,  0.1364,  ..., -0.1623, -0.3198, -0.0403],
         [ 0.1554,  0.0368,  0.0812,  ..., -0.2988, -0.1664, -0.1087],
         [ 0.0514,  0.0836, -0.1168,  ..., -0.4965, -0.3402,  0.2075]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0542],
         [ 0.0703],
         [ 0.0091],
         [ 0.0105],
         [ 0.0467],
         [ 0.0167],
         [-0.0016],
         [ 0.0778],
         [ 0.0679],
         [ 0.0358]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3921], device='cuda:0', dtype=torch.flo

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

para_indexes:  tensor([  23,  136,  255,  518,  552,  820,  924, 1043, 1120, 1204],
       device='cuda:0')
sp_para_output:  tensor([[[ 2.9386e-01,  2.8084e-01, -4.9943e-02,  ..., -1.2617e-02,
          -2.7567e-02,  1.0276e-01],
         [ 2.5099e-01,  6.7550e-02,  2.9048e-04,  ..., -7.7380e-01,
          -1.2097e-01, -2.8712e-01],
         [-4.2723e-02,  4.6758e-01, -5.2645e-01,  ..., -6.4458e-01,
          -1.9583e-01, -3.1497e-01],
         ...,
         [ 3.5275e-01,  2.7817e-02,  4.9424e-02,  ..., -7.3301e-02,
          -1.6024e-01,  5.0187e-02],
         [ 2.8701e-01, -5.5427e-02, -7.8536e-02,  ..., -4.3393e-01,
           1.4286e-01,  1.2548e-02],
         [-4.3410e-01,  4.2407e-02, -2.2555e-01,  ..., -1.0101e+00,
          -2.2413e-01,  7.6332e-02]]], device='cuda:0',
       grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[ 0.0568],
         [ 0.0833],
         [ 0.0385],
         [-0.0071],
         [ 0.1223],
         [ 0.0200],
         [ 0.0525],
         [-0.0405],
 



para_indexes:  tensor([  13,  107,  197,  378,  541,  644,  784,  930, 1152, 1294],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.3733,  0.1723,  0.1475,  ...,  0.0497, -0.2964,  0.3080],
         [ 0.1780, -0.1823,  0.2016,  ..., -0.3821, -0.1040,  0.0339],
         [-0.0143, -0.4192, -0.1518,  ..., -0.1873, -0.1646,  0.3773],
         ...,
         [-0.1514,  0.1775,  0.1980,  ..., -0.3657, -0.1014, -0.0805],
         [ 0.1550,  0.0772,  0.2084,  ..., -0.5464, -0.5150, -0.1003],
         [ 0.2558, -0.0023,  0.1458,  ..., -0.7604, -0.1949,  0.1881]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[0.0682],
         [0.1259],
         [0.1053],
         [0.0726],
         [0.1344],
         [0.1655],
         [0.0198],
         [0.1289],
         [0.2450],
         [0.2123]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([ 21, 178, 251, 276, 378, 441, 520, 601, 706, 801], device='cuda:0')
sp_para_outpu

para_indexes:  tensor([  22,  123,  225,  306,  392,  519,  575,  803,  922, 1002],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1794,  0.0223,  0.3305,  ..., -0.5337, -0.1641,  0.0122],
         [ 0.2092,  0.0719,  0.2108,  ..., -0.6396, -0.1058,  0.0346],
         [-0.0096,  0.0076,  0.0335,  ..., -0.5345, -0.0482,  0.0724],
         ...,
         [ 0.1440,  0.2346,  0.0802,  ..., -0.6163,  0.0340, -0.0704],
         [ 0.0306,  0.1240, -0.0118,  ..., -0.3306, -0.2396,  0.2268],
         [ 0.1180,  0.2028,  0.0294,  ..., -0.3584, -0.1586,  0.0861]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[ 0.1078],
         [ 0.0619],
         [ 0.0378],
         [ 0.0210],
         [ 0.0727],
         [ 0.1506],
         [ 0.0690],
         [-0.0759],
         [-0.0421],
         [ 0.0748]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([  31,   92,  193,  291,  328,  448,  602,  660,  983, 1154],
       dev

para_indexes:  tensor([ 19, 117, 148, 310, 424, 497, 690, 746, 834, 880], device='cuda:0')
sp_para_output:  tensor([[[-0.1661, -0.1924,  0.1328,  ..., -0.6874, -0.2461,  0.1640],
         [-0.2869, -0.1833,  0.0358,  ..., -0.6939,  0.0907,  0.0985],
         [-0.0687,  0.2780, -0.0692,  ..., -0.2913, -0.5128,  0.1335],
         ...,
         [ 0.0662, -0.1945,  0.1392,  ..., -0.2278,  0.0284,  0.1959],
         [-0.1841,  0.4041, -0.0536,  ...,  0.0549,  0.3761,  0.2300],
         [-0.1448, -0.1907,  0.2507,  ...,  0.0380, -0.3545,  0.0155]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[ 0.0463],
         [ 0.0685],
         [ 0.0429],
         [-0.0242],
         [-0.0481],
         [ 0.1643],
         [ 0.0316],
         [-0.0278],
         [-0.0571],
         [-0.0175]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([  24,  264,  335,  472,  606,  730,  846,  933, 1037, 1156],
       device='cuda:0')
sp_

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

validation_step
qid:  5a736f7c5542991f29ee2e18
para_indexes:  tensor([  25,  611,  650,  709,  745,  793,  933,  980, 1127, 1197],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1466, -0.2742,  0.1868,  ..., -0.1593, -0.1023, -0.0465],
         [ 0.1238, -0.2670,  0.1092,  ..., -0.1934, -0.1167,  0.2339],
         [ 0.1893, -0.1316, -0.0259,  ..., -0.1334, -0.1789,  0.0664],
         ...,
         [ 0.1231,  0.0780,  0.1336,  ..., -0.1337, -0.3226, -0.0486],
         [ 0.1597,  0.0393,  0.0803,  ..., -0.2740, -0.1685, -0.1138],
         [ 0.0598,  0.0839, -0.1160,  ..., -0.4643, -0.3418,  0.1999]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0352],
         [ 0.0512],
         [-0.0070],
         [-0.0051],
         [ 0.0291],
         [-0.0033],
         [-0.0212],
         [ 0.0594],
         [ 0.0526],
         [ 0.0163]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3831], device='cuda:0', dtype=torch.flo

sp_para_output:  tensor([[[ 0.3168,  0.1556,  0.0955,  ..., -0.9299, -0.4049,  0.0824],
         [ 0.1808,  0.1286, -0.1825,  ..., -0.3106, -0.3051, -0.0791],
         [ 0.1847,  0.1924, -0.0830,  ..., -0.4556, -0.1999, -0.0623],
         ...,
         [ 0.1172,  0.1337,  0.0130,  ..., -0.1573, -0.2940, -0.2942],
         [ 0.1212,  0.0404, -0.0370,  ..., -0.2186, -0.1563, -0.0930],
         [ 0.1423,  0.0770, -0.0593,  ..., -0.2886, -0.5204, -0.1379]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0625],
         [ 0.0002],
         [ 0.0122],
         [ 0.0309],
         [-0.0610],
         [ 0.0010],
         [ 0.0137],
         [ 0.1044],
         [-0.0363],
         [ 0.0410]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3594], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3594], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

para_sent_logits: tensor([0.0213], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0715,  0.0490,  0.0378, -0.1278, -0.0803, -0.3382, -0.0073, -0.0683,
         0.0353,  0.0213], device='cuda:0')
para_sents_offset: [0, 3, 8, 9, 13, 15, 21, 22, 25, 27, 28]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([1, 2], device='cuda:0')
validation_step
qid:  5ae203d6554299234fd04394
para_indexes:  tensor([ 21, 102, 188, 238, 290, 352, 379, 464, 507, 564], device='cuda:0')
sp_pa

para_sent_logits: tensor([0.0375, 0.0286], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0238,  0.0661], device='cuda:0')
para_sents_offset: [0, 2, 4]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0746,  0.0180, -0.0539], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0238,  0.0661,  0.0387], device='cuda:0')
para_sents_offset: [0, 2, 4, 7]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0457, -0.0555], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0238,  0.0661,  0.0387, -0.0097], device='cuda:0')
para_sents_offset: [0, 2, 4, 7, 9]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 

sp_para_output:  tensor([[[-0.1356,  0.2763, -0.0604,  ..., -0.3440, -0.0783,  0.1480],
         [-0.0955, -0.1601,  0.1124,  ..., -0.3829, -0.0977,  0.1089],
         [ 0.1427, -0.1478,  0.0309,  ...,  0.0305, -0.3513, -0.2794],
         ...,
         [ 0.0452,  0.0801,  0.0128,  ..., -0.0669, -0.1996, -0.1618],
         [-0.1074, -0.0465,  0.0337,  ..., -0.2235, -0.2311, -0.0519],
         [-0.1037, -0.0461,  0.0928,  ..., -0.0102, -0.3213, -0.1233]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0170],
         [0.1310],
         [0.0352],
         [0.0796],
         [0.0597],
         [0.0215],
         [0.0399],
         [0.0215],
         [0.0888]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3772], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3772], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8]


validation_step
qid:  5a81418455429903bc27b95f
para_indexes:  tensor([ 18,  60, 122, 143, 275, 492, 510, 590, 656, 706], device='cuda:0')
sp_para_output:  tensor([[[-0.0619,  0.3277, -0.0106,  ..., -0.3571,  0.1021,  0.4612],
         [ 0.1138,  0.0472,  0.0909,  ..., -0.6431,  0.0629,  0.3405],
         [-0.1093,  0.2997, -0.1079,  ..., -0.4078,  0.1300,  0.1358],
         ...,
         [-0.1928, -0.0605, -0.1096,  ..., -0.4451, -0.3622, -0.1033],
         [-0.1385,  0.0347, -0.0262,  ..., -0.1531, -0.3122, -0.0916],
         [-0.0171,  0.0147, -0.0414,  ..., -0.5071, -0.2270,  0.2226]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0040],
         [ 0.0331],
         [ 0.0246],
         [ 0.0330],
         [ 0.0098],
         [-0.0310],
         [ 0.1213],
         [ 0.0823],
         [-0.0006],
         [ 0.0389]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3303], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a7750de55429966f1a36cf0
para_indexes:  tensor([  18,  127,  236,  385,  435,  544,  745,  864,  954, 1077],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1214,  0.3814,  0.0085,  ..., -0.3161,  0.0043,  0.2073],
         [ 0.0965, -0.1015,  0.1045,  ...,  0.0074,  0.0147,  0.0538],
         [-0.0670,  0.5582, -0.2290,  ..., -0.0056,  0.2326,  0.1104],
         ...,
         [-0.0800, -0.0862,  0.0133,  ..., -0.0623,  0.2366, -0.1459],
         [ 0.1998, -0.0167,  0.0698,  ...,  0.1573, -0.3050, -0.0688],
         [ 0.0317,  0.4788,  0.0581,  ..., -0.1973,  0.0969,  0.1817]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0016],
         [-0.0034],
         [ 0.0401],
         [-0.0873],
         [-0.0088],
         [ 0.0464],
         [-0.0535],
         [ 0.0177],
         [ 0.0001],
         [-0.0277]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2866], device='cuda:0', dtype=torch.flo

validation_step
qid:  5abd4c6d5542993062266c47
para_indexes:  tensor([ 17,  63, 113, 222, 303, 453, 537, 593], device='cuda:0')
sp_para_output:  tensor([[[ 0.2443, -0.1960,  0.2047,  ..., -0.5025,  0.0965, -0.0075],
         [ 0.1052, -0.1673, -0.0765,  ..., -0.4306, -0.0540, -0.0507],
         [ 0.1071, -0.2339,  0.0082,  ..., -0.2893, -0.0921, -0.1509],
         ...,
         [-0.2557, -0.2668,  0.2168,  ..., -0.2711,  0.2449,  0.4451],
         [ 0.0116, -0.2818,  0.0878,  ..., -0.6992,  0.1453,  0.0816],
         [-0.0171,  0.3018, -0.0563,  ..., -0.5658,  0.2738,  0.1054]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0597],
         [0.0952],
         [0.0574],
         [0.0438],
         [0.0550],
         [0.1186],
         [0.0615],
         [0.0678]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3616], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tens

validation_step
qid:  5adc8d4f554299473435373d
para_indexes:  tensor([ 20,  79, 120, 194, 309, 431, 507, 588, 672, 744], device='cuda:0')
sp_para_output:  tensor([[[ 1.8660e-01, -4.6064e-04, -6.4513e-03,  ..., -4.5945e-01,
          -5.4448e-02, -7.0116e-02],
         [-8.5978e-02,  1.7233e-01,  8.1102e-02,  ..., -3.7395e-01,
          -2.7390e-01,  1.3198e-01],
         [ 1.4143e-01,  1.7284e-01, -1.8592e-01,  ..., -6.9656e-01,
          -5.7485e-02, -2.2182e-01],
         ...,
         [ 3.3826e-02,  5.7844e-02,  2.9487e-01,  ..., -2.1713e-01,
          -4.6058e-01, -2.9023e-01],
         [ 1.4276e-01,  2.0153e-01,  1.3734e-01,  ..., -1.9490e-01,
          -7.6461e-03,  2.4133e-01],
         [ 1.1118e-01,  1.4621e-01, -1.6273e-01,  ..., -2.5749e-01,
          -1.2549e-01, -4.1789e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0331],
         [ 0.0690],
         [ 0.0451],
         [-0.0026],
         [ 0.0098],
         [ 0.0230],
         [ 0.0920],
         [ 0.0340],
   

sp_para_output:  tensor([[[-0.1426,  0.1817,  0.0922,  ..., -0.6585,  0.1808,  0.1984],
         [-0.0169, -0.0768,  0.0689,  ..., -0.2928, -0.3647,  0.1285],
         [ 0.1083,  0.0614,  0.0124,  ..., -0.7227, -0.2590,  0.2528],
         ...,
         [ 0.1140,  0.0750,  0.2586,  ..., -0.4030, -0.2186,  0.0883],
         [ 0.0568,  0.3022,  0.1604,  ..., -0.5417, -0.3344, -0.0198],
         [-0.1249,  0.1612,  0.1504,  ..., -0.4368, -0.2509,  0.0241]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0313],
         [0.0389],
         [0.0069],
         [0.0196],
         [0.0906],
         [0.0664],
         [0.0234],
         [0.0403]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3557], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3557], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 7]
p

para_sent_logits: tensor([0.0290, 0.0593], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0815, 0.0977, 0.2529, 0.2271, 0.1636, 0.0701, 0.0370, 0.0529, 0.0883],
       device='cuda:0')
para_sents_offset: [0, 3, 5, 7, 10, 12, 14, 16, 18, 20]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0, 1], device='cuda:0'), 3: tensor([2], device='cuda:0'), 4: tensor([0], device='cuda:0'), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0823, 0.1165, 0.0403, 0.0247, 0.0147], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.0815, 0.0977, 0.2529, 0.2271, 0.1636, 0.0701, 0.0370, 0.0529, 0.0883,
        0.2785], device='cuda:0')
para_sents_offset: [0, 3, 5, 7, 10, 12, 14, 16, 18, 20, 25

sp_para_output:  tensor([[[ 0.0380,  0.1246,  0.0928,  ..., -0.5024, -0.3432,  0.0754],
         [-0.1639,  0.1095,  0.0577,  ..., -0.3143, -0.0863, -0.0651],
         [ 0.0185,  0.0526, -0.0268,  ..., -0.1232,  0.0011,  0.1949],
         ...,
         [ 0.3107,  0.0886, -0.0951,  ..., -0.8653, -0.2546, -0.2052],
         [ 0.0741,  0.3226, -0.1760,  ..., -0.7357,  0.0333,  0.2214],
         [-0.2707,  0.2277,  0.0483,  ..., -0.7052, -0.1942,  0.1104]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0453],
         [ 0.0064],
         [ 0.0594],
         [-0.0435],
         [-0.0317],
         [-0.0809],
         [ 0.0167],
         [-0.0365],
         [-0.0252],
         [-0.0561]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3203], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3203], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

sp_para_output:  tensor([[[-0.0152, -0.0886,  0.0853,  ..., -0.0986,  0.0643,  0.0524],
         [ 0.0187, -0.0407,  0.1871,  ..., -0.1488,  0.0572,  0.0038],
         [-0.1212, -0.2490,  0.2910,  ..., -0.3237, -0.0450,  0.3622],
         ...,
         [ 0.1228,  0.1530,  0.2436,  ..., -0.5432,  0.0420,  0.2164],
         [ 0.0648, -0.0281,  0.3403,  ..., -0.1769,  0.1122,  0.0888],
         [ 0.0122, -0.0196,  0.0142,  ..., -0.3521, -0.2508,  0.0819]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0121],
         [0.1306],
         [0.0599],
         [0.0371],
         [0.0193],
         [0.0376],
         [0.0914],
         [0.1526],
         [0.1140],
         [0.0867]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3738], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3738], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1, 2, 2, 3,

sp_para_output:  tensor([[[ 0.3524,  0.1376,  0.1498,  ..., -0.4422, -0.2221, -0.1151],
         [ 0.2180,  0.0555,  0.0624,  ..., -0.7820, -0.1369, -0.0146],
         [-0.0385,  0.1039, -0.0037,  ..., -0.4986, -0.1196, -0.2066],
         ...,
         [-0.1107,  0.2033,  0.0166,  ..., -0.3430, -0.0841, -0.0331],
         [ 0.0905, -0.1267,  0.1563,  ...,  0.0997, -0.1900, -0.1702],
         [ 0.1329,  0.1137,  0.1108,  ..., -0.5926, -0.3627, -0.0403]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0421],
         [ 0.0623],
         [ 0.0928],
         [-0.0552],
         [ 0.0347],
         [-0.0033],
         [ 0.0520],
         [ 0.0720],
         [ 0.0508]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2712], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2712], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 2, 2, 3, 3, 4, 5, 5, 

validation_step
qid:  5a74cfc75542996c70cfadfe
para_indexes:  tensor([  18,  171,  415,  510,  626,  752,  865,  994, 1062, 1118],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.3734, -0.4398,  0.1275,  ..., -0.4198, -0.0808,  0.0825],
         [-0.0047, -0.1206,  0.0734,  ..., -0.4814, -0.0363,  0.0986],
         [-0.1093, -0.0788,  0.0571,  ..., -0.4410, -0.0584,  0.2886],
         ...,
         [-0.1894, -0.0200,  0.0134,  ..., -0.0566,  0.0721,  0.1608],
         [ 0.0857, -0.6001,  0.0891,  ..., -0.0142,  0.1003, -0.0994],
         [-0.0130,  0.0675, -0.0214,  ..., -0.4266, -0.2056,  0.2174]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0152],
         [ 0.0167],
         [ 0.0745],
         [ 0.0673],
         [ 0.0533],
         [-0.1030],
         [ 0.0113],
         [ 0.1046],
         [ 0.0101],
         [ 0.0735]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3350], device='cuda:0', dtype=torch.flo

       device='cuda:0')
para_sents_offset: [0, 9, 12, 14, 22, 24, 31, 36, 39, 42]
evidence_candidates: {0: tensor([3, 4, 7], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([4, 5], device='cuda:0'), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([1, 2], device='cuda:0'), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0217,  0.0408], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.7711, 0.1714, 0.0582, 0.5605, 0.0960, 0.2975, 0.3494, 0.2393, 0.0505,
        0.0191], device='cuda:0')
para_sents_offset: [0, 9, 12, 14, 22, 24, 31, 36, 39, 42, 44]
evidence_candidates: {0: tensor([3, 4, 7], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([4, 5], device='cuda:0'),

sp_para_output:  tensor([[[ 0.1895, -0.3138,  0.1770,  ..., -0.3172, -0.2523, -0.1189],
         [-0.0617,  0.0930,  0.0575,  ..., -0.7997, -0.0528,  0.2826],
         [-0.1080,  0.2308,  0.0450,  ..., -0.2129,  0.3070,  0.0473],
         ...,
         [-0.0580,  0.0827,  0.0968,  ..., -0.3053, -0.1282,  0.2300],
         [-0.1068,  0.1686,  0.0522,  ..., -0.7610,  0.2229, -0.0999],
         [ 0.1918, -0.2150,  0.0140,  ..., -0.5451, -0.1036,  0.1289]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0546],
         [ 0.0950],
         [-0.0034],
         [ 0.0753],
         [ 0.0959],
         [ 0.0917],
         [ 0.0507],
         [ 0.0056],
         [ 0.0293],
         [-0.0141]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3235], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3235], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

para_sent_logits: tensor([-0.0444, -0.0793], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.1286, -0.0713, -0.1681,  0.0835, -0.0134, -0.0849, -0.1165,  0.0113,
        -0.1238], device='cuda:0')
para_sents_offset: [0, 2, 4, 8, 11, 13, 15, 18, 19, 21]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([3, 7], device='cuda:0')
validation_step
qid:  5adc6f6255429947ff173961
para_indexes:  tensor([ 21,  69, 138, 225, 314, 403, 448, 495, 549, 584], device='cuda:0')
sp_para_output:  tensor([[[-0.0770,  0.0663, -0.2269,  ...,

sp_para_output:  tensor([[[ 0.2433,  0.2209, -0.0996,  ..., -0.3263, -0.2635, -0.0649],
         [ 0.2910,  0.0825, -0.0598,  ..., -0.2606, -0.1082, -0.2152],
         [-0.2242,  0.2815, -0.2185,  ..., -0.0633, -0.1843,  0.2029],
         ...,
         [ 0.1772,  0.0803, -0.0604,  ..., -0.2322, -0.2418, -0.0698],
         [ 0.1796,  0.2279,  0.0751,  ..., -0.2582, -0.2960, -0.1207],
         [ 0.1157,  0.2632,  0.0377,  ..., -0.4178, -0.1091,  0.0935]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0838],
         [0.0750],
         [0.0051],
         [0.0489],
         [0.1267],
         [0.0456],
         [0.1039],
         [0.0614],
         [0.0740],
         [0.1084]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3621], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3621], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1, 1, 1, 2,

validation_step
qid:  5ae71f06554299572ea54708
para_indexes:  tensor([  33,  261,  368,  517,  585,  639,  861,  963, 1047, 1111],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0908,  0.2282,  0.1576,  ..., -0.2216, -0.2893, -0.2097],
         [-0.1371,  0.0570,  0.0414,  ..., -0.2319, -0.3723,  0.0846],
         [-0.2693,  0.2915, -0.1017,  ..., -0.2499,  0.4162,  0.1856],
         ...,
         [-0.0796,  0.2229, -0.1421,  ..., -0.6111, -0.5513, -0.2028],
         [-0.0632,  0.0089,  0.0064,  ..., -0.1978,  0.1064,  0.0806],
         [ 0.0362,  0.0705,  0.1554,  ..., -0.6169, -0.2322,  0.2644]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0214],
         [-0.0742],
         [-0.0410],
         [ 0.0278],
         [ 0.1311],
         [ 0.0100],
         [ 0.0182],
         [ 0.0584],
         [-0.0331],
         [ 0.0665]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2900], device='cuda:0', dtype=torch.flo

validation_step
qid:  5a8bd4d65542997f31a41dd8
para_indexes:  tensor([  24,  140,  338,  545,  721,  867, 1089, 1136, 1230, 1380],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.2267, -0.2427, -0.1820,  ..., -0.4973, -0.2300,  0.0097],
         [ 0.1220, -0.0268, -0.0199,  ..., -0.5506, -0.1880, -0.1036],
         [-0.0949,  0.2444, -0.1344,  ..., -0.5560,  0.1899,  0.0714],
         ...,
         [ 0.1133, -0.1624, -0.0532,  ..., -0.5390, -0.0578,  0.1575],
         [ 0.1171, -0.0249,  0.2740,  ..., -0.2464, -0.5289,  0.3364],
         [-0.0948,  0.5105,  0.0820,  ..., -0.6491,  0.2081,  0.2648]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0754],
         [ 0.0600],
         [-0.0070],
         [ 0.1155],
         [ 0.0738],
         [ 0.0041],
         [ 0.0614],
         [ 0.0540],
         [ 0.0824],
         [ 0.0193]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3308], device='cuda:0', dtype=torch.flo

validation_step
qid:  5add1eed5542992ae4cec485
para_indexes:  tensor([ 15,  86, 110], device='cuda:0')
sp_para_output:  tensor([[[-0.0310, -0.2880,  0.3124,  ..., -0.2967,  0.2202,  0.0469],
         [-0.1807, -0.2891,  0.2967,  ..., -0.5145,  0.2379,  0.1322],
         [-0.0251, -0.3769,  0.0419,  ..., -0.5816,  0.3693,  0.1086]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0519],
         [ 0.0905],
         [-0.0131]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3359], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3359], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 2, 2]
para_sent_logits: tensor([ 0.0073, -0.0017, -0.0373], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0317], device='cuda:0')
para_sents_offset: [0, 3]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_l

para_sent_logits: tensor([-0.0046, -0.0034, -0.0161], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0242], device='cuda:0')
para_sents_offset: [0, 3]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0389], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0242,  0.0389], device='cuda:0')
para_sents_offset: [0, 3, 4]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0172,  0.0137], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0242,  0.0389, -0.0035], device='cuda:0')
para_sents_offset: [0, 3, 4, 6]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0432, 0.0415], device='cuda:0', dtype=torch.float16)
para_sent_logi

       device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0639, 0.4423, 0.1455, 0.2117, 0.1567, 0.1479, 0.1304, 0.7004],
       device='cuda:0')
para_sents_offset: [0, 1, 6, 9, 11, 14, 17, 18, 27]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([2], device='cuda:0'), 2: tensor([0], device='cuda:0'), 3: tensor([1], device='cuda:0'), 4: tensor([1], device='cuda:0'), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([0], device='cuda:0'), 7: tensor([1, 2, 6], device='cuda:0')}
sp_para_pred: tensor([7, 1], device='cuda:0')
validation_step
qid:  5ae28058554299495565da90
para_indexes:  tensor([ 18,  50,  84, 157, 236, 363, 458, 510, 621, 707], device='cuda:0')
sp_para_output:  tensor([[[ 0.1677, -0.1454,  0.3320,  ..., -0.4036,  0.1078,  0.4145],
         [-0.0829, -0.1178,  0.1634,  ..., -0.4278,  0.0453, -0.0360],
         [ 0.0129,  0.0084,  0.1037,  ..., -0.4270,  0.1904,  0.1514],
         ...,
         [ 0.0144, -0.1717, 

validation_step
qid:  5a8e00515542995a26add454
para_indexes:  tensor([  19,  122,  242,  284,  396,  564,  668,  773,  836, 1001],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.3592,  0.0296,  0.1728,  ..., -0.5046, -0.2877,  0.0946],
         [ 0.1903, -0.0032, -0.0896,  ..., -0.4307, -0.3141, -0.3135],
         [ 0.0317,  0.1150, -0.0210,  ..., -0.7886, -0.2389,  0.0755],
         ...,
         [ 0.2711, -0.1970, -0.0961,  ..., -0.3766, -0.2455,  0.1873],
         [ 0.1501,  0.0622,  0.1918,  ..., -0.4695, -0.1088,  0.0467],
         [ 0.0608, -0.0569, -0.1861,  ..., -0.5937,  0.0842, -0.0051]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0587],
         [-0.0079],
         [-0.0616],
         [-0.0784],
         [-0.0714],
         [ 0.0040],
         [-0.0288],
         [-0.0326],
         [-0.0213],
         [ 0.0364]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4272], device='cuda:0', dtype=torch.flo

para_sent_logits_sum: tensor([ 0.1349,  0.1617, -0.0792,  0.0407,  0.1489], device='cuda:0')
para_sents_offset: [0, 3, 7, 10, 12, 14]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 1.3318e-01,  2.6840e-02,  1.3725e-02,  9.4528e-03,  5.5237e-02,
         2.0523e-02, -6.9916e-05], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1349,  0.1617, -0.0792,  0.0407,  0.1489,  0.2589], device='cuda:0')
para_sents_offset: [0, 3, 7, 10, 12, 14, 21]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5

sp_para_output:  tensor([[[ 0.0139, -0.0208,  0.1955,  ..., -0.0533, -0.3006, -0.1371],
         [-0.0044, -0.0334,  0.1103,  ..., -0.2629, -0.0215, -0.0357],
         [-0.2234,  0.2436,  0.1212,  ..., -0.2914, -0.0530,  0.1065],
         ...,
         [-0.0161,  0.0350,  0.1076,  ..., -0.0264, -0.2187,  0.0120],
         [ 0.1006,  0.3389,  0.0979,  ..., -0.8262, -0.2440, -0.3270],
         [-0.0716,  0.1551,  0.1490,  ..., -0.4862,  0.0200, -0.0336]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0018],
         [-0.0249],
         [-0.0131],
         [-0.0615],
         [-0.0181],
         [ 0.0223],
         [ 0.0078],
         [-0.0504],
         [-0.0427],
         [ 0.0008]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3406], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3406], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 2

       device='cuda:0')
para_sents_offset: [0, 2, 3, 4, 7, 12, 16, 18, 20]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0080], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0447,  0.0867, -0.0224,  0.0249,  0.1091,  0.0431,  0.0024, -0.0118,
        -0.0080], device='cuda:0')
para_sents_offset: [0, 2, 3, 4, 7, 12, 16, 18, 20, 21]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: 

validation_step
qid:  5ae791a55542994a481bbdaa
para_indexes:  tensor([ 28, 120, 205, 303, 408, 479, 541, 639, 714, 791], device='cuda:0')
sp_para_output:  tensor([[[ 0.1868,  0.2099,  0.4125,  ..., -0.2601, -0.3641, -0.1953],
         [ 0.0538, -0.1087, -0.1308,  ..., -0.1654,  0.0011, -0.0343],
         [ 0.2690, -0.1265, -0.0590,  ..., -0.3344, -0.3282, -0.0886],
         ...,
         [ 0.3304,  0.0877,  0.3042,  ..., -0.3342, -0.4937, -0.2775],
         [ 0.0689,  0.0931,  0.0975,  ..., -0.0054, -0.4526, -0.1022],
         [-0.1014,  0.0613, -0.0023,  ..., -0.2792,  0.0391,  0.2853]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0327],
         [ 0.0364],
         [ 0.0128],
         [-0.0241],
         [-0.0524],
         [ 0.0970],
         [ 0.0166],
         [-0.0582],
         [-0.0321],
         [-0.0327]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2345], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.3791, -0.1818,  0.3125,  ...,  0.0180, -0.2547,  0.3770],
         [ 0.3050,  0.0222,  0.2232,  ..., -0.0933, -0.0934,  0.2715],
         [ 0.2349, -0.0040,  0.1045,  ..., -0.3654, -0.0626,  0.1607],
         ...,
         [ 0.1075, -0.0605,  0.0555,  ..., -0.2875, -0.1332,  0.1670],
         [ 0.0162,  0.0588, -0.1248,  ..., -0.0029, -0.1279,  0.0205],
         [ 0.1668,  0.3949,  0.1901,  ..., -0.1777, -0.0853,  0.3503]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0508],
         [ 0.1174],
         [ 0.0327],
         [ 0.0134],
         [ 0.0157],
         [ 0.1038],
         [ 0.0833],
         [ 0.0254],
         [-0.0139],
         [ 0.0125]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2705], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2705], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 1

validation_step
qid:  5a8637415542994775f60724
para_indexes:  tensor([  19,  252,  346,  488,  540,  644,  741,  794,  908, 1040],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0297,  0.4349, -0.1923,  ..., -0.1263, -0.0831,  0.0250],
         [ 0.0392,  0.2280, -0.0609,  ...,  0.2178, -0.3450, -0.0092],
         [ 0.0362,  0.2168,  0.1418,  ..., -0.1972, -0.1239,  0.1546],
         ...,
         [ 0.0582,  0.3388,  0.0588,  ..., -0.2764, -0.5478, -0.2704],
         [ 0.0424,  0.1717,  0.1528,  ..., -0.3193, -0.4568,  0.0825],
         [ 0.2771,  0.1608,  0.2308,  ...,  0.2338, -0.1386,  0.0877]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0052],
         [-0.0941],
         [-0.0242],
         [ 0.1036],
         [-0.0736],
         [-0.0170],
         [-0.0472],
         [-0.0842],
         [-0.0365],
         [-0.0748]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2896], device='cuda:0', dtype=torch.flo

sp_para_output:  tensor([[[-0.3137,  0.1593, -0.1414,  ..., -0.1724,  0.1302,  0.1666],
         [-0.1012, -0.0834, -0.0823,  ..., -0.2162, -0.0137, -0.0200],
         [-0.1917, -0.0895, -0.0501,  ..., -0.0853,  0.0530,  0.0430],
         ...,
         [-0.2790,  0.0544, -0.0691,  ..., -0.1896, -0.0334, -0.0615],
         [-0.0764,  0.0078,  0.0057,  ..., -0.0496, -0.1952,  0.0971],
         [ 0.0040,  0.0763, -0.0978,  ..., -0.0631, -0.2276,  0.0440]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0416],
         [ 0.0636],
         [-0.0098],
         [ 0.0466],
         [ 0.0024],
         [ 0.1026],
         [-0.0306],
         [ 0.0352],
         [-0.0141],
         [-0.0188]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4915], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4915], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 2

sp_para_output:  tensor([[[ 0.0622, -0.2667,  0.2070,  ..., -0.0498,  0.2400, -0.0948],
         [-0.2603, -0.1520,  0.0603,  ...,  0.1894, -0.0187, -0.1505],
         [ 0.0281, -0.0855,  0.0493,  ..., -0.1194,  0.1011, -0.0058],
         ...,
         [-0.2504, -0.0081,  0.1004,  ..., -0.1363,  0.0136, -0.3376],
         [ 0.0293, -0.0735,  0.0080,  ...,  0.1102,  0.1155, -0.1945],
         [ 0.1102, -0.1834,  0.0601,  ...,  0.1621,  0.0628, -0.2829]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0209],
         [ 0.1005],
         [ 0.0880],
         [ 0.0572],
         [ 0.0239],
         [ 0.0204],
         [ 0.0045],
         [ 0.0090],
         [-0.0079],
         [-0.0543]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4087], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4087], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 2

sp_para_output:  tensor([[[-0.0119, -0.2181,  0.0589,  ..., -0.3925, -0.0025,  0.1352],
         [-0.1413, -0.2130, -0.0161,  ..., -0.6686, -0.0475,  0.1192],
         [-0.3162, -0.1394,  0.3616,  ...,  0.1991, -0.4197,  0.4338],
         ...,
         [ 0.0273, -0.3012,  0.0659,  ..., -0.1465, -0.2959, -0.0686],
         [ 0.0831, -0.1282, -0.1058,  ..., -0.3426, -0.2728, -0.2418],
         [ 0.1835, -0.2392,  0.0872,  ..., -0.3700, -0.1535,  0.0621]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0220],
         [ 0.0686],
         [ 0.0155],
         [-0.0394],
         [-0.0517],
         [ 0.0711],
         [-0.0349],
         [ 0.0531],
         [ 0.0047],
         [-0.0907]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3027], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3027], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 1

validation_step
qid:  5ac4c0fc5542997ea680cab1
para_indexes:  tensor([ 24, 105, 214, 300, 350, 420, 448, 482], device='cuda:0')
sp_para_output:  tensor([[[-0.0332,  0.0398,  0.1106,  ..., -0.0032,  0.1295,  0.0744],
         [-0.2042, -0.2565,  0.1797,  ...,  0.0582, -0.0920, -0.0535],
         [-0.3095, -0.1893,  0.0515,  ..., -0.1136, -0.1051, -0.0638],
         ...,
         [-0.1441, -0.2486,  0.0807,  ..., -0.4470, -0.2444, -0.0444],
         [-0.1891, -0.3461,  0.1895,  ..., -0.2034,  0.0327,  0.1403],
         [-0.2413, -0.2457,  0.0816,  ..., -0.3792, -0.1192, -0.0872]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0339],
         [ 0.0895],
         [ 0.0297],
         [ 0.0119],
         [-0.0022],
         [ 0.0674],
         [ 0.0751],
         [ 0.0263]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3467], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_scor

sp_para_output:  tensor([[[ 0.1415, -0.0220,  0.2936,  ..., -0.4824, -0.0702,  0.1407],
         [-0.0255,  0.0418,  0.1176,  ..., -0.4720, -0.1289,  0.0720],
         [-0.3319,  0.3271,  0.2187,  ..., -0.2898, -0.3477, -0.1103],
         ...,
         [-0.1022, -0.0094,  0.1538,  ..., -0.6366, -0.0483, -0.0266],
         [ 0.1339,  0.3629, -0.1198,  ..., -0.2723, -0.3715,  0.2261],
         [-0.0478,  0.1437,  0.0134,  ..., -0.0447,  0.0401,  0.0954]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0408],
         [ 0.0285],
         [ 0.0222],
         [-0.0594],
         [-0.0399],
         [-0.0316],
         [ 0.0108],
         [-0.0188]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3494], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3494], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5,

validation_step
qid:  5a74b66a55429929fddd84cb
para_indexes:  tensor([ 15,  93, 178, 247, 298, 387, 468, 501, 583, 656], device='cuda:0')
sp_para_output:  tensor([[[-0.0873,  0.2128, -0.1218,  ..., -0.1706,  0.0354,  0.0810],
         [ 0.0908,  0.0107,  0.0762,  ..., -0.5412, -0.1055,  0.0604],
         [ 0.0739,  0.0421, -0.0251,  ..., -0.5604, -0.2633,  0.1323],
         ...,
         [-0.0803, -0.1784, -0.1276,  ..., -0.2007, -0.2498, -0.1305],
         [ 0.0349,  0.1306, -0.0008,  ..., -0.6471, -0.2060, -0.0160],
         [ 0.1016,  0.1015, -0.0352,  ..., -0.7507, -0.2981, -0.0530]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0560],
         [ 0.0699],
         [-0.0291],
         [-0.0488],
         [-0.0645],
         [-0.0888],
         [ 0.0492],
         [ 0.0240],
         [ 0.0453],
         [ 0.0075]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3970], device='cuda:0', dtype=torch.float16), 'start_log

para_indexes:  tensor([ 19,  79, 173, 322, 432, 487, 575, 696, 798, 886], device='cuda:0')
sp_para_output:  tensor([[[ 0.0821, -0.1032,  0.1670,  ..., -0.3123, -0.1822, -0.0972],
         [ 0.0296,  0.3053,  0.0429,  ..., -0.4152, -0.0814, -0.2179],
         [-0.0173,  0.1423,  0.0857,  ..., -0.4492, -0.4116, -0.1611],
         ...,
         [-0.1707, -0.0118, -0.0466,  ..., -0.3140, -0.4365,  0.1279],
         [-0.0553,  0.1709,  0.0228,  ..., -0.5400,  0.0786, -0.0448],
         [-0.2361,  0.3051, -0.1654,  ..., -0.4397,  0.2943,  0.1268]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0281],
         [ 0.0255],
         [-0.0385],
         [ 0.0105],
         [-0.0347],
         [ 0.0276],
         [ 0.0135],
         [-0.0951],
         [-0.0645],
         [-0.0129]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3391], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_s

para_sent_logits: tensor([-0.0541], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0362,  0.0479, -0.0480,  0.0041, -0.0175, -0.0398, -0.0541],
       device='cuda:0')
para_sents_offset: [0, 1, 5, 6, 7, 8, 9, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0705], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0362,  0.0479, -0.0480,  0.0041, -0.0175, -0.0398, -0.0541, -0.0705],
       device='cuda:0')
para_sents_offset: [0, 1, 5, 6, 7, 8, 9, 10, 11]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor(

validation_step
qid:  5a7e1b7155429965cec5ea6c
para_indexes:  tensor([ 26,  94, 197, 269, 342, 526, 601, 682, 772, 849], device='cuda:0')
sp_para_output:  tensor([[[ 0.2819,  0.1450,  0.1920,  ...,  0.0166, -0.3514, -0.1867],
         [ 0.0205,  0.1112,  0.0888,  ..., -0.0369, -0.0489,  0.0316],
         [ 0.0581,  0.0703,  0.1656,  ..., -0.1607, -0.3462, -0.0990],
         ...,
         [ 0.2889,  0.2504,  0.0376,  ..., -0.1771, -0.3551,  0.0030],
         [-0.1986,  0.2621, -0.0937,  ..., -0.0331, -0.0516, -0.0528],
         [-0.0383,  0.3514,  0.1484,  ..., -0.3126, -0.0778, -0.1167]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0435],
         [ 0.1246],
         [ 0.0429],
         [-0.0657],
         [ 0.0140],
         [-0.0097],
         [ 0.0674],
         [-0.0095],
         [-0.0364],
         [ 0.0249]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3721], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a7fc5875542992e7d278d67
para_indexes:  tensor([ 23,  80, 153, 191, 225, 288, 520, 580, 662, 734], device='cuda:0')
sp_para_output:  tensor([[[ 0.0450, -0.0054, -0.0047,  ..., -0.3343,  0.0466,  0.2361],
         [ 0.3112,  0.2549,  0.2058,  ..., -0.7889, -0.1847,  0.2016],
         [ 0.2360, -0.1898, -0.0782,  ..., -0.7739, -0.4229,  0.2420],
         ...,
         [ 0.3107, -0.0996,  0.1375,  ..., -0.5547, -0.2058,  0.1475],
         [ 0.1303, -0.1194,  0.2160,  ..., -0.2616, -0.2979,  0.1783],
         [-0.0938, -0.0657,  0.3995,  ...,  0.0269,  0.2270,  0.0483]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0884],
         [ 0.0847],
         [ 0.0192],
         [ 0.1490],
         [ 0.0447],
         [-0.0206],
         [ 0.0467],
         [ 0.0372],
         [ 0.0085],
         [ 0.0235]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3479], device='cuda:0', dtype=torch.float16), 'start_log

       device='cuda:0')
para_sents_offset: [0, 6, 10, 13, 16, 20, 23, 26]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([1], device='cuda:0'), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0406, -0.0906], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0061,  0.0462, -0.2223,  0.0680, -0.0094,  0.1730,  0.0346, -0.0500],
       device='cuda:0')
para_sents_offset: [0, 6, 10, 13, 16, 20, 23, 26, 28]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([1], device='c

sp_para_output:  tensor([[[ 0.4295,  0.1896,  0.0316,  ..., -0.6844, -0.2683,  0.0547],
         [ 0.1780,  0.0247, -0.0231,  ..., -0.6518, -0.3147, -0.1213],
         [ 0.2760,  0.0895, -0.0396,  ..., -0.5743, -0.4218, -0.0506],
         ...,
         [-0.0205,  0.1370,  0.1249,  ..., -0.0142, -0.1274, -0.0140],
         [ 0.0293,  0.1257, -0.0921,  ..., -0.4902, -0.1704, -0.2912],
         [-0.0058,  0.1544,  0.0652,  ..., -0.4313, -0.3064,  0.1519]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0359],
         [ 0.0410],
         [-0.0505],
         [-0.0105],
         [ 0.0390],
         [ 0.1112],
         [ 0.0620],
         [ 0.0807],
         [ 0.0316],
         [ 0.0350]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2600], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2600], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 2, 2

validation_step
qid:  5a89ddf25542992e4fca83ff
para_indexes:  tensor([ 17, 112, 169, 221, 263, 312, 398, 450, 492, 548], device='cuda:0')
sp_para_output:  tensor([[[-0.0889,  0.4415, -0.0253,  ..., -0.2464, -0.0091,  0.0690],
         [ 0.0470, -0.2559, -0.1489,  ..., -0.0072, -0.0313, -0.0200],
         [-0.4499,  0.0409, -0.3904,  ..., -0.6489, -0.2259, -0.1348],
         ...,
         [-0.2241, -0.3775, -0.2072,  ...,  0.3133, -0.3382, -0.2286],
         [ 0.2042, -0.1195, -0.1333,  ..., -0.2772, -0.4104, -0.2569],
         [-0.0560, -0.0292,  0.0311,  ..., -0.4465, -0.0554, -0.0298]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0261],
         [ 0.0263],
         [ 0.0200],
         [ 0.0253],
         [-0.0495],
         [ 0.0236],
         [ 0.0372],
         [ 0.0742],
         [ 0.0740],
         [ 0.0217]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3538], device='cuda:0', dtype=torch.float16), 'start_log

para_indexes:  tensor([  25,   92,  286,  341,  432,  526,  749,  848,  937, 1019],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0860, -0.2716,  0.1982,  ..., -0.0213, -0.1728,  0.2092],
         [ 0.1585, -0.1544,  0.2556,  ..., -0.0408, -0.4169, -0.2598],
         [ 0.2487,  0.1712,  0.1729,  ..., -0.5673, -0.2970, -0.3878],
         ...,
         [-0.0714,  0.0302,  0.1498,  ..., -0.2877, -0.0058,  0.0858],
         [ 0.1312, -0.0166,  0.2953,  ..., -0.4352, -0.0728, -0.2913],
         [ 0.1256,  0.1381,  0.1095,  ..., -0.5229, -0.0851, -0.2100]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0493],
         [0.0825],
         [0.0128],
         [0.0207],
         [0.0290],
         [0.0894],
         [0.0409],
         [0.0056],
         [0.0409],
         [0.0996]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3042], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p

validation_step
qid:  5a7a70b355429941d65f2647
para_indexes:  tensor([ 17,  93, 184, 260, 362, 443, 554, 617, 647, 744], device='cuda:0')
sp_para_output:  tensor([[[ 0.0870,  0.3028, -0.2453,  ..., -0.5830, -0.0540, -0.0310],
         [-0.0568,  0.0579,  0.0298,  ..., -0.4373, -0.0201,  0.0202],
         [ 0.0172, -0.2680, -0.1854,  ..., -0.9071, -0.1224, -0.2715],
         ...,
         [-0.0224,  0.0503, -0.0034,  ..., -0.7546, -0.2040, -0.1389],
         [ 0.0471, -0.0493, -0.1588,  ..., -0.6649, -0.4325, -0.2757],
         [-0.1370,  0.0036, -0.0267,  ..., -0.4810, -0.1648, -0.1019]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0521],
         [ 0.0602],
         [-0.0072],
         [-0.0483],
         [-0.0587],
         [ 0.0320],
         [-0.0237],
         [ 0.0334],
         [ 0.0024],
         [-0.0241]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2979], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5ae1f2d4554299234fd04358
para_indexes:  tensor([ 21, 127, 174, 264, 298, 356, 459, 508, 578, 655], device='cuda:0')
sp_para_output:  tensor([[[ 0.3031,  0.0254,  0.0954,  ..., -0.7204,  0.0990,  0.0813],
         [ 0.1573, -0.1845, -0.1127,  ..., -0.7555, -0.0126,  0.0302],
         [-0.1280, -0.0009,  0.1155,  ..., -0.4663, -0.1285, -0.0022],
         ...,
         [ 0.1662, -0.0190,  0.0325,  ..., -0.1070, -0.2712,  0.1044],
         [-0.0557, -0.1149,  0.0772,  ..., -0.6372, -0.2289,  0.0754],
         [-0.0014,  0.0072,  0.0790,  ..., -0.2465, -0.2927,  0.0958]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0166],
         [-0.0076],
         [-0.0040],
         [-0.0684],
         [-0.0900],
         [-0.0221],
         [ 0.0654],
         [ 0.0298],
         [ 0.0408],
         [-0.0854]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3000], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5abb07e95542992ccd8e7ec4
para_indexes:  tensor([ 16, 133, 238, 273, 406, 433, 497, 690, 803, 861], device='cuda:0')
sp_para_output:  tensor([[[ 0.1605, -0.0036,  0.1182,  ..., -0.4978, -0.0799,  0.2117],
         [ 0.1542,  0.0503,  0.0507,  ..., -0.7689, -0.1262,  0.2037],
         [ 0.0365,  0.2379, -0.0307,  ..., -0.8037, -0.2263,  0.1006],
         ...,
         [-0.1735,  0.0214, -0.1645,  ..., -0.6252,  0.0015,  0.2646],
         [ 0.1008,  0.2770,  0.0327,  ..., -0.4057, -0.1636,  0.1051],
         [-0.1751,  0.2541,  0.1594,  ..., -0.4715,  0.0280,  0.0696]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0074],
         [-0.0134],
         [ 0.0260],
         [-0.0497],
         [ 0.0040],
         [ 0.0679],
         [ 0.0699],
         [-0.0035],
         [-0.0630],
         [ 0.0209]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3296], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.3111, -0.0522, -0.1084,  ..., -0.5607, -0.2810,  0.0871],
         [ 0.0783, -0.0591, -0.1373,  ..., -0.3256, -0.2236, -0.1858],
         [ 0.0594,  0.0191,  0.0811,  ..., -0.6125, -0.0414,  0.0706],
         ...,
         [-0.0483,  0.1789, -0.0923,  ..., -0.3712, -0.0463,  0.0180],
         [ 0.1077, -0.0339,  0.1677,  ..., -0.6839, -0.1992, -0.1330],
         [ 0.1405, -0.2216, -0.0016,  ..., -0.1990,  0.0862, -0.0415]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0807],
         [ 0.0264],
         [ 0.0844],
         [ 0.0536],
         [ 0.1321],
         [ 0.0530],
         [ 0.0447],
         [-0.0077],
         [ 0.0944],
         [ 0.1182]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3181], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3181], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 1

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0916,  0.0271, -0.0001], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0437, -0.0220,  0.0061,  0.0006,  0.0351,  0.0267, -0.0257,  0.1185],
       device='cuda:0')
para_sents_offset: [0, 1, 3, 4, 5, 7, 8, 10, 13]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cud

validation_step
qid:  5ac3e5de554299204fd21ebc
para_indexes:  tensor([ 20,  97, 175, 199, 312, 379, 482, 529, 612], device='cuda:0')
sp_para_output:  tensor([[[ 0.0663,  0.1695,  0.1389,  ..., -0.4125, -0.0316,  0.2420],
         [-0.0926, -0.0925, -0.0261,  ..., -0.5106, -0.0450,  0.1771],
         [-0.2016,  0.4378, -0.0757,  ..., -0.7107, -0.3814, -0.4643],
         ...,
         [-0.1810,  0.0795, -0.2778,  ..., -0.3987, -0.0784, -0.2458],
         [ 0.0238,  0.2059,  0.1929,  ..., -0.5479, -0.1350,  0.0942],
         [-0.1436,  0.0504,  0.0330,  ..., -0.3922,  0.1218,  0.2608]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0739],
         [ 0.1130],
         [ 0.0510],
         [ 0.0822],
         [-0.0210],
         [ 0.0010],
         [ 0.0870],
         [ 0.1327],
         [ 0.1321]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3105], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit

validation_step
qid:  5a84b6ed5542994c784dda18
para_indexes:  tensor([ 20,  50, 132, 156, 216, 275, 423, 445, 535, 597], device='cuda:0')
sp_para_output:  tensor([[[ 0.2031, -0.1308,  0.2645,  ..., -0.3083, -0.1417,  0.3771],
         [ 0.0692, -0.1903,  0.2750,  ..., -0.2748,  0.2556,  0.2194],
         [ 0.0572,  0.0576, -0.2349,  ..., -0.3618,  0.1606,  0.3086],
         ...,
         [ 0.0047, -0.0775,  0.0702,  ..., -0.0936, -0.0773,  0.1704],
         [ 0.0368, -0.0406, -0.1109,  ..., -0.2885, -0.1804, -0.2127],
         [-0.0210, -0.0107, -0.0368,  ..., -0.0134,  0.4177,  0.0135]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0464],
         [ 0.0386],
         [ 0.0248],
         [-0.0303],
         [ 0.0489],
         [-0.0043],
         [ 0.0473],
         [ 0.0410],
         [ 0.0865],
         [ 0.0723]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3120], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits_sum: tensor([0.1411], device='cuda:0')
para_sents_offset: [0, 2]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0017,  0.0956], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1411, 0.0939], device='cuda:0')
para_sents_offset: [0, 2, 4]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0306, 0.0303, 0.1005, 0.0675], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1411, 0.0939, 0.2289], device='cuda:0')
para_sents_offset: [0, 2, 4, 8]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([2], device='cuda:0')}
para_sent_logits: tensor([0.1152, 0.0881], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1411, 0.0939, 0.2289, 0.2032], device='cuda:0')
para_sents_offset: [0, 2, 

para_indexes:  tensor([ 36, 116, 161, 204, 296, 360, 423, 481, 515, 568], device='cuda:0')
sp_para_output:  tensor([[[ 0.0739,  0.1580, -0.1408,  ..., -0.4835, -0.0716,  0.1848],
         [-0.1386, -0.0414, -0.0013,  ..., -0.6122, -0.0921,  0.2750],
         [-0.0133,  0.0029, -0.1676,  ..., -0.5214, -0.0221,  0.4061],
         ...,
         [ 0.0546, -0.0625,  0.0483,  ..., -0.2566, -0.0206,  0.1022],
         [ 0.3058, -0.2014,  0.1070,  ..., -0.1548, -0.0763,  0.1141],
         [-0.0135,  0.3835, -0.0178,  ..., -0.7504, -0.2112,  0.2826]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0238],
         [0.1043],
         [0.1039],
         [0.0354],
         [0.0138],
         [0.0131],
         [0.0136],
         [0.0784],
         [0.0266],
         [0.0629]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2639], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': ten

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0434, 0.0764], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0790, 0.0432, 0.1309, 0.0607, 0.0641, 0.0190, 0.0000, 0.1198],
       device='cuda:0')
para_sents_offset: [0, 1, 2, 4, 5, 6, 8, 8, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.in

decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2788], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2788], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9]
para_sent_logits: tensor([ 0.0575,  0.0257,  0.0415,  0.0900, -0.0006,  0.0518], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.2659], device='cuda:0')
para_sents_offset: [0, 6]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0042,  0.0576,  0.0410,  0.0101,  0.0135,  0.0316, -0.0285],
       device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.2659, 0.1210], device='cuda:0')
para_sents_offset: [0, 6, 13]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', 

         0.0864,  0.0406], device='cuda:0')
para_sents_offset: [0, 5, 6, 7, 13, 14, 16, 17, 19, 21, 22]
evidence_candidates: {0: tensor([2, 3], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([0, 8], device='cuda:0')
validation_step
qid:  5a908aaa55429933b8a20545
para_indexes:  tensor([ 20,  91, 173, 223, 244, 329, 420, 476, 529, 603], device='cuda:0')
sp_para_output:  tensor([[[ 0.1185,  0.2334,  0.3282,  ..., -0.4708, -0.0521,  0.1587],
         [-0.1881, -0.0395,  0.2058,  ..., -0.1079, -0.0037,  0.0518],
         [-0.2311,  0.0577,  0.2015,

sp_para_output:  tensor([[[ 0.2063,  0.0074,  0.1547,  ..., -0.9058, -0.2488,  0.2373],
         [ 0.0473, -0.0670, -0.0114,  ..., -0.6428, -0.0380,  0.0876],
         [ 0.0614,  0.0346, -0.0409,  ..., -0.5919, -0.3638, -0.0798],
         ...,
         [ 0.0154, -0.0894,  0.0325,  ..., -0.1313, -0.2909,  0.0034],
         [ 0.0245,  0.1613, -0.1883,  ..., -0.9820, -0.2126, -0.1890],
         [ 0.0204, -0.1115, -0.0520,  ..., -0.4745, -0.1072, -0.1266]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0113],
         [ 0.0977],
         [-0.0054],
         [-0.0525],
         [ 0.0200],
         [ 0.0335],
         [ 0.0451],
         [ 0.0106],
         [ 0.0596],
         [-0.0187]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3147], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3147], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

validation_step
qid:  5a903b7f55429933b8a204af
para_indexes:  tensor([ 18,  95, 157, 186, 277, 326, 386, 444, 507], device='cuda:0')
sp_para_output:  tensor([[[-0.1161,  0.0348,  0.0182,  ..., -0.3543,  0.1820, -0.0197],
         [-0.0232,  0.0647,  0.0599,  ..., -0.2744, -0.0979, -0.0910],
         [-0.2525,  0.1457,  0.0511,  ..., -0.5294, -0.1717, -0.2479],
         ...,
         [-0.0725,  0.0033, -0.0415,  ..., -0.2727, -0.1851, -0.5822],
         [-0.2977,  0.0287,  0.0394,  ..., -0.1700, -0.2285, -0.1374],
         [-0.0945, -0.0060, -0.1486,  ..., -0.1397, -0.2069, -0.2944]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0717],
         [ 0.1041],
         [-0.0157],
         [-0.0316],
         [-0.0784],
         [ 0.0078],
         [-0.0873],
         [ 0.0422],
         [ 0.0416]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3362], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit

s_to_p_map:  [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9]
para_sent_logits: tensor([ 0.0320, -0.0078,  0.0703, -0.0037], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.0907], device='cuda:0')
para_sents_offset: [0, 4]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0957, -0.0263,  0.0093], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0907, 0.0787], device='cuda:0')
para_sents_offset: [0, 4, 7]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0335, -0.1036, -0.0616], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0907,  0.0787, -0.1317], device='cuda:0')
para_sents_offset: [0, 4, 7, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64

sp_para_output:  tensor([[[ 2.6618e-01, -2.5506e-01,  1.7805e-01,  ..., -4.8472e-01,
          -2.5650e-01,  8.9777e-02],
         [-7.4275e-02,  7.8070e-02, -3.2881e-01,  ..., -5.8698e-01,
           1.1868e-02,  2.2771e-01],
         [ 3.9255e-02,  6.6255e-02,  1.2455e-02,  ..., -8.1784e-01,
          -3.2862e-01, -4.8445e-02],
         ...,
         [ 2.6517e-01, -1.5141e-01,  1.9756e-04,  ..., -5.3118e-01,
          -4.9240e-01, -2.3498e-01],
         [ 2.1503e-01,  3.7971e-02, -1.7609e-02,  ..., -8.5441e-01,
          -1.6747e-01,  5.2861e-02],
         [-1.7077e-01, -1.1795e-01,  1.2303e-01,  ..., -5.6199e-01,
          -2.5000e-01,  9.9958e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0492],
         [ 0.0482],
         [-0.0590],
         [ 0.0288],
         [ 0.1238],
         [ 0.0449],
         [ 0.0135],
         [ 0.0537],
         [ 0.1046],
         [ 0.0257]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor(

       device='cuda:0')
para_sents_offset: [0, 1, 4, 5, 6, 10, 14, 15, 17]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0193, -0.0457,  0.0424,  0.0145], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0420, -0.0245,  0.0266, -0.0076,  0.0264,  0.0693,  0.0310, -0.0788,
        -0.0082], device='cuda:0')
para_sents_offset: [0, 1, 4, 5, 6, 10, 14, 15, 17, 21]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device

sp_para_output:  tensor([[[ 0.1797, -0.0339,  0.2362,  ..., -0.1268,  0.0245,  0.0410],
         [-0.2839,  0.1207,  0.1043,  ..., -0.0833, -0.0908, -0.2270],
         [-0.1477, -0.1267,  0.3620,  ..., -0.2047,  0.1623,  0.0026],
         ...,
         [-0.1667, -0.2392,  0.2248,  ..., -0.0593,  0.0204, -0.1007],
         [-0.0257, -0.0316,  0.2071,  ..., -0.0929,  0.3640,  0.1889],
         [-0.1035,  0.0533,  0.1184,  ..., -0.2723,  0.0461, -0.1107]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0120],
         [ 0.0897],
         [ 0.0948],
         [ 0.0620],
         [ 0.0687],
         [ 0.0928],
         [ 0.0879],
         [ 0.1208],
         [ 0.0453],
         [ 0.0988]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3975], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3975], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 1

validation_step
qid:  5a8b98665542995d1e6f1413
para_indexes:  tensor([ 23, 183, 223, 274, 311, 370, 452, 532, 610], device='cuda:0')
sp_para_output:  tensor([[[ 6.7439e-02, -1.6259e-01,  3.4696e-01,  ...,  2.3465e-01,
          -2.2733e-01, -1.1781e-01],
         [-3.9126e-01,  1.2068e-01,  1.7128e-01,  ...,  1.6942e-01,
          -1.3847e-01, -4.9752e-02],
         [-3.0502e-01,  1.1049e-01,  7.7907e-02,  ..., -3.1018e-01,
          -3.8210e-04,  2.5005e-01],
         ...,
         [-3.4395e-01,  6.7585e-02,  9.6058e-02,  ...,  3.9159e-02,
          -1.8374e-01,  1.2236e-01],
         [-2.2462e-01,  9.0115e-02,  2.8768e-01,  ...,  1.6982e-01,
          -9.3370e-02,  2.2795e-01],
         [ 2.0865e-03,  2.9647e-01,  1.6888e-01,  ..., -6.0990e-01,
          -1.8006e-01, -2.5256e-03]]], device='cuda:0')
sp_para_output_t:  tensor([[[0.0622],
         [0.0617],
         [0.0857],
         [0.0266],
         [0.0280],
         [0.0604],
         [0.1017],
         [0.0530],
         [0.1001

validation_step
qid:  5add6cf45542992ae4cec55f
para_indexes:  tensor([ 26, 137, 196, 239, 293, 336, 526, 586, 728, 912], device='cuda:0')
sp_para_output:  tensor([[[ 1.3741e-01, -2.2975e-02,  4.2104e-02,  ..., -3.8840e-02,
           1.2574e-02,  1.5734e-01],
         [ 1.9183e-01, -1.4834e-01, -1.0433e-01,  ..., -3.9365e-01,
          -1.0634e-01, -4.1299e-02],
         [ 6.6892e-02, -1.1765e-01,  3.6179e-02,  ..., -3.7424e-01,
          -1.6971e-02,  8.4709e-02],
         ...,
         [-7.6853e-02,  1.9813e-02, -1.4031e-01,  ..., -4.5867e-01,
           4.2153e-04,  1.9975e-01],
         [-2.5010e-01, -2.0079e-01,  1.0868e-01,  ..., -3.7844e-01,
           5.7035e-02,  2.0358e-01],
         [ 9.4542e-02, -1.0734e-01,  1.3413e-01,  ...,  4.2740e-02,
          -1.2256e-02, -1.8257e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0854],
         [ 0.0354],
         [ 0.0277],
         [ 0.0988],
         [ 0.0091],
         [ 0.0363],
         [ 0.0870],
         [ 0.0642],
   

para_sent_logits: tensor([-0.0847,  0.0133], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0130,  0.1273, -0.0453, -0.0081,  0.0540, -0.0714], device='cuda:0')
para_sents_offset: [0, 5, 8, 9, 10, 13, 15]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0593], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0130,  0.1273, -0.0453, -0.0081,  0.0540, -0.0714, -0.0593],
       device='cuda:0')
para_sents_offset: [0, 5, 8, 9, 10, 13, 15, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0

sp_para_output:  tensor([[[-0.0886,  0.1126,  0.0972,  ..., -0.0935, -0.1898, -0.3011],
         [-0.0789,  0.2654,  0.0633,  ..., -0.2469, -0.1053,  0.0516],
         [-0.3692,  0.1034,  0.0896,  ..., -0.3481,  0.0249,  0.1426],
         ...,
         [-0.2562,  0.0704,  0.0425,  ..., -0.2236,  0.0759,  0.0248],
         [ 0.0206,  0.3958,  0.1362,  ..., -0.6927, -0.1344, -0.0736],
         [-0.0204,  0.4490, -0.0492,  ..., -0.3375,  0.0410, -0.0485]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0868],
         [ 0.0425],
         [ 0.0435],
         [ 0.0718],
         [ 0.0470],
         [ 0.0873],
         [ 0.1187],
         [-0.0216],
         [ 0.0789],
         [-0.0131]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3484], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3484], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0145], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-1.3699e-01,  9.4795e-02,  1.0471e-01, -1.9193e-04, -5.5908e-02,
        -2.1057e-02,  2.4002e-02,  5.4321e-03, -2.0079e-01,  1.4549e-02],
       device='cuda:0')
para_sents_offset: [0, 3, 6, 10, 11, 12, 13, 15, 17, 22, 23]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], dev

validation_step
qid:  5ab92307554299753720f72d
para_indexes:  tensor([ 30, 153, 296, 398, 437, 471, 531, 556, 725], device='cuda:0')
sp_para_output:  tensor([[[ 8.7524e-02, -4.7156e-02,  1.9602e-01,  ..., -5.9746e-01,
          -1.0363e-01,  1.1952e-01],
         [ 6.5546e-02, -8.5790e-02, -1.3258e-02,  ..., -8.5940e-01,
          -3.7475e-01,  3.3070e-01],
         [ 2.7473e-04,  2.1565e-02,  9.4937e-02,  ..., -8.4289e-01,
           3.8380e-02,  7.9681e-02],
         ...,
         [ 1.1912e-01,  1.2737e-01,  6.4140e-02,  ..., -7.0254e-01,
          -1.2043e-01,  4.1480e-01],
         [ 5.4858e-02, -2.2721e-01,  1.4826e-01,  ..., -5.8528e-01,
          -4.7878e-02,  3.9114e-01],
         [-5.3976e-03,  2.4755e-01,  5.5360e-02,  ..., -4.6992e-01,
          -8.1369e-02,  1.2326e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0436],
         [-0.0103],
         [ 0.0169],
         [ 0.0361],
         [ 0.0670],
         [ 0.1175],
         [ 0.0847],
         [ 0.0832],
        

para_sent_logits_sum: tensor([-0.1265, -0.1380, -0.1034,  0.0041, -0.1405], device='cuda:0')
para_sents_offset: [0, 3, 5, 7, 10, 13]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0707, -0.0390, -0.0823], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.1265, -0.1380, -0.1034,  0.0041, -0.1405, -0.1920], device='cuda:0')
para_sents_offset: [0, 3, 5, 7, 10, 13, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor

validation_step
qid:  5ae1f2f3554299234fd0435a
para_indexes:  tensor([ 15, 105, 181, 224, 302, 396, 534, 592, 766, 866], device='cuda:0')
sp_para_output:  tensor([[[ 5.6440e-02,  6.3892e-02,  1.2368e-01,  ...,  4.1562e-02,
           5.1437e-02,  3.8538e-04],
         [ 3.4128e-03,  3.5832e-01, -1.8858e-01,  ..., -1.1979e-01,
           1.1102e-01, -2.0885e-02],
         [-6.1893e-03,  4.0727e-02, -3.3251e-02,  ...,  5.7559e-02,
          -1.5817e-01, -3.1293e-03],
         ...,
         [-7.0015e-02,  1.2736e-01,  8.8520e-02,  ..., -5.3072e-02,
          -2.2326e-04, -5.6629e-02],
         [ 7.9323e-03,  2.0937e-01,  2.9757e-02,  ..., -1.2533e-03,
          -1.9059e-01, -1.6928e-02],
         [ 2.3269e-02,  2.0649e-01,  3.0190e-02,  ..., -2.9540e-01,
          -2.2218e-01, -7.4753e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0079],
         [ 0.0448],
         [-0.0576],
         [ 0.0389],
         [-0.0641],
         [-0.0437],
         [-0.0365],
         [ 0.0092],
   

sp_para_output:  tensor([[[ 0.0989, -0.2645,  0.0868,  ...,  0.0185, -0.1861, -0.0929],
         [ 0.0516, -0.0680,  0.0549,  ..., -0.1414, -0.2868, -0.1023],
         [-0.1852,  0.0410, -0.0159,  ..., -0.3079, -0.4189, -0.1755],
         ...,
         [ 0.1061, -0.1000,  0.1569,  ..., -0.2806, -0.1743,  0.1832],
         [-0.1933,  0.0677, -0.0248,  ..., -0.3380, -0.3354, -0.2126],
         [-0.1067, -0.1772, -0.0443,  ..., -0.7242, -0.0096,  0.1401]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0217],
         [ 0.0513],
         [ 0.0226],
         [ 0.0413],
         [ 0.0237],
         [ 0.0914],
         [ 0.0845],
         [ 0.1063],
         [-0.0119],
         [ 0.0180]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3225], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3225], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 2

validation_step
qid:  5a77a3905542992a6e59df40
para_indexes:  tensor([  24,  135,  296,  312,  470,  564,  725,  809,  920, 1017],
       device='cuda:0')
sp_para_output:  tensor([[[-0.1048,  0.2849, -0.0773,  ..., -0.3820,  0.1308,  0.1293],
         [ 0.0905, -0.2608,  0.1090,  ..., -0.5390, -0.1404,  0.0254],
         [-0.2131, -0.2150,  0.1091,  ..., -0.3487,  0.0371,  0.0284],
         ...,
         [-0.2080, -0.0798,  0.2865,  ..., -0.4717, -0.0930,  0.1223],
         [ 0.0801,  0.3558, -0.0141,  ..., -0.7729,  0.4033,  0.4755],
         [ 0.0892, -0.1356,  0.1320,  ..., -0.3070, -0.2432,  0.0996]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0757],
         [ 0.0043],
         [-0.0131],
         [ 0.0390],
         [ 0.0822],
         [ 0.0686],
         [ 0.0941],
         [-0.0108],
         [ 0.0159],
         [ 0.1107]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2705], device='cuda:0', dtype=torch.flo

validation_step
qid:  5abe9c1a5542993f32c2a183
para_indexes:  tensor([ 19,  75, 102, 161, 311, 347, 444, 527, 634], device='cuda:0')
sp_para_output:  tensor([[[ 0.0197,  0.2336,  0.0582,  ..., -0.2290,  0.0810, -0.0599],
         [-0.0425, -0.0377, -0.0501,  ..., -0.4643,  0.0546, -0.0630],
         [-0.0494, -0.0436, -0.1518,  ..., -0.2303, -0.0218, -0.0676],
         ...,
         [-0.3251, -0.0481,  0.1100,  ..., -0.0872, -0.1231, -0.1370],
         [-0.1432, -0.0273, -0.0243,  ..., -0.1383,  0.0887, -0.1922],
         [-0.1172,  0.2158, -0.1016,  ..., -0.2818, -0.1100, -0.1261]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0419],
         [0.0553],
         [0.0764],
         [0.0479],
         [0.0156],
         [0.0652],
         [0.1095],
         [0.0740],
         [0.0347]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3235], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -10000

sp_para_output:  tensor([[[ 0.1890, -0.0207,  0.1678,  ..., -0.5958, -0.2663,  0.3100],
         [ 0.0770,  0.2763, -0.1149,  ..., -0.9087,  0.0317,  0.2625],
         [ 0.2047,  0.2212,  0.1107,  ..., -0.4697, -0.3832,  0.1626],
         ...,
         [ 0.1437,  0.3594,  0.0548,  ..., -0.8839, -0.2566,  0.0308],
         [ 0.0883, -0.0289, -0.2284,  ..., -0.6505, -0.0265,  0.1139],
         [ 0.0831,  0.3043, -0.1678,  ..., -0.7939, -0.0627,  0.0115]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0609],
         [0.0969],
         [0.0262],
         [0.0475],
         [0.1191],
         [0.0414],
         [0.0595],
         [0.1083],
         [0.1157],
         [0.1466]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3037], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3037], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0, 1, 1, 1,

validation_step
qid:  5ab8736455429916710eb058
para_indexes:  tensor([ 13, 151, 251, 285, 386, 489, 584, 619, 693, 748], device='cuda:0')
sp_para_output:  tensor([[[ 0.1258,  0.1165,  0.1954,  ..., -0.1137, -0.2593, -0.1064],
         [-0.1342, -0.0183,  0.0011,  ..., -0.3635, -0.0750,  0.1395],
         [-0.0992,  0.1416,  0.0823,  ..., -0.2699, -0.2647, -0.3227],
         ...,
         [-0.1085,  0.1943, -0.0042,  ..., -0.5303, -0.0241,  0.1250],
         [-0.5169,  0.3027,  0.0392,  ..., -0.2831, -0.0913, -0.1406],
         [-0.1416,  0.1207,  0.1164,  ..., -0.3785,  0.1199,  0.1223]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0315],
         [ 0.0039],
         [ 0.0530],
         [-0.0510],
         [-0.0143],
         [ 0.0625],
         [ 0.1133],
         [ 0.0549],
         [ 0.0064],
         [ 0.0085]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3113], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5ae234005542994d89d5b392
para_indexes:  tensor([ 21,  75, 161, 228, 279, 340, 447, 505, 560, 625], device='cuda:0')
sp_para_output:  tensor([[[-0.0652,  0.0226,  0.0521,  ..., -0.2531, -0.1969, -0.0966],
         [-0.0851, -0.2822,  0.0287,  ..., -0.4462, -0.1268,  0.0172],
         [ 0.0311,  0.2396,  0.0886,  ..., -0.3029,  0.1484, -0.2886],
         ...,
         [ 0.2200, -0.1182, -0.0384,  ..., -0.2833, -0.1553, -0.3050],
         [ 0.2415, -0.3283,  0.1829,  ..., -0.1681, -0.1777,  0.2655],
         [ 0.0835, -0.1162, -0.2259,  ..., -0.3714,  0.0187, -0.1619]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0387],
         [ 0.0959],
         [-0.0182],
         [ 0.0134],
         [-0.0635],
         [ 0.0100],
         [ 0.0006],
         [ 0.0253],
         [-0.0330],
         [ 0.0453]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3408], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5add44c25542997545bbbd0a
para_indexes:  tensor([ 23, 106, 231, 251, 351, 420, 513, 594, 755, 818], device='cuda:0')
sp_para_output:  tensor([[[ 0.3604, -0.1009,  0.1266,  ..., -0.3313, -0.2993, -0.0050],
         [ 0.1101,  0.2233, -0.2272,  ..., -0.5466, -0.1801,  0.1109],
         [ 0.1496,  0.1993,  0.0070,  ..., -0.5485, -0.3209, -0.0147],
         ...,
         [ 0.1472,  0.1374,  0.1287,  ..., -0.5257, -0.0584,  0.1821],
         [ 0.1339,  0.1326,  0.0750,  ..., -0.5167, -0.0929,  0.0729],
         [ 0.0983,  0.0699,  0.1908,  ..., -0.3357, -0.2196, -0.3456]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0307],
         [ 0.1096],
         [ 0.0146],
         [-0.0135],
         [ 0.0130],
         [ 0.0337],
         [-0.0184],
         [ 0.0382],
         [-0.0138],
         [-0.0173]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3308], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits: tensor([0.0552, 0.0522], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0098,  0.0190, -0.0090, -0.0031, -0.1347, -0.1074, -0.0047,  0.0210,
         0.0784,  0.1075], device='cuda:0')
para_sents_offset: [0, 1, 6, 8, 9, 13, 16, 17, 18, 21, 23]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([9, 8], device='cuda:0')
validation_step
qid:  5a7b1c745542992d025e6777
para_indexes:  tensor([ 22,  68,  92, 142, 183, 255, 427, 525, 581], device='cuda:0')
sp_

validation_step
qid:  5a90b3c65542990a984936b7
para_indexes:  tensor([ 18, 104, 171, 372, 433, 483, 518, 661, 715, 803], device='cuda:0')
sp_para_output:  tensor([[[ 0.1966,  0.2527,  0.0215,  ...,  0.0543,  0.3862,  0.1404],
         [ 0.1326, -0.0552,  0.0797,  ..., -0.2878, -0.1505,  0.0692],
         [ 0.2152,  0.1840,  0.1651,  ..., -0.4655, -0.4405,  0.2476],
         ...,
         [ 0.0699, -0.0144,  0.1005,  ..., -0.4062, -0.2554,  0.0150],
         [-0.0747, -0.0141,  0.1610,  ..., -0.2356, -0.1470,  0.0916],
         [ 0.0973,  0.2024,  0.0724,  ..., -0.4327, -0.2518,  0.0890]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0800],
         [ 0.1072],
         [ 0.0416],
         [-0.0244],
         [ 0.0312],
         [ 0.0543],
         [ 0.0335],
         [ 0.0532],
         [ 0.0525],
         [-0.0638]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3865], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.1454, -0.0083,  0.3372,  ..., -0.2072, -0.1341,  0.3722],
         [ 0.0173,  0.4042, -0.1151,  ..., -0.5196,  0.3026,  0.1484],
         [-0.1376,  0.0413, -0.1867,  ..., -0.1265,  0.0321,  0.1552],
         ...,
         [ 0.0299,  0.1659, -0.1744,  ..., -0.3473, -0.0859,  0.2067],
         [-0.0477,  0.0482, -0.0999,  ..., -0.3588, -0.2895,  0.0378],
         [-0.0486,  0.0391, -0.0265,  ..., -0.4622, -0.2352,  0.1358]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0492],
         [ 0.0482],
         [ 0.0143],
         [ 0.0156],
         [ 0.0673],
         [ 0.0140],
         [-0.0152],
         [ 0.0538],
         [-0.0029]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2700], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2700], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 

sp_para_output:  tensor([[[ 0.1313, -0.1624,  0.0937,  ..., -0.1684,  0.0300,  0.0305],
         [-0.0742,  0.2214, -0.1419,  ...,  0.4830,  0.1201,  0.1483],
         [-0.0752,  0.3943,  0.0606,  ..., -0.4189,  0.0787,  0.1441],
         ...,
         [-0.2537,  0.2076,  0.0106,  ...,  0.1021,  0.0899, -0.0852],
         [-0.0218,  0.3077, -0.2113,  ..., -0.0186,  0.0132, -0.3373],
         [-0.4247, -0.0128, -0.0208,  ...,  0.0250,  0.2107, -0.1962]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0078],
         [-0.0443],
         [-0.0290],
         [-0.0188],
         [ 0.0737],
         [ 0.0726],
         [ 0.0591],
         [-0.0046],
         [-0.0117],
         [-0.0508]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3213], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3213], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

validation_step
qid:  5a82a1ae55429954d2e2eb89
para_indexes:  tensor([ 28,  79, 117, 220, 268, 314, 427, 475, 555, 619], device='cuda:0')
sp_para_output:  tensor([[[ 0.3141, -0.0509, -0.0008,  ...,  0.0756, -0.4098, -0.2204],
         [ 0.1730,  0.0654, -0.0908,  ..., -0.2364, -0.2231, -0.2072],
         [-0.0092, -0.2298, -0.2012,  ...,  0.1605, -0.4554, -0.2294],
         ...,
         [-0.1613, -0.0171, -0.1770,  ...,  0.0706, -0.3747, -0.4505],
         [ 0.0641,  0.3387,  0.0607,  ..., -0.1051, -0.0978,  0.0686],
         [-0.0292,  0.1124, -0.2084,  ..., -0.1531,  0.0928,  0.0152]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0306],
         [ 0.0307],
         [-0.0142],
         [ 0.0199],
         [-0.0466],
         [-0.0387],
         [ 0.0361],
         [ 0.0344],
         [-0.0104],
         [ 0.0890]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3582], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.0994,  0.0013,  0.1481,  ..., -0.6700, -0.2920,  0.1717],
         [ 0.0176, -0.1309,  0.0912,  ..., -0.3636, -0.3804, -0.0773],
         [-0.2311,  0.2037,  0.1370,  ..., -0.4344, -0.3575, -0.0349],
         ...,
         [ 0.1968, -0.1876,  0.2714,  ..., -0.3881, -0.2349,  0.2511],
         [ 0.1322, -0.1984,  0.1234,  ..., -0.7340, -0.1098,  0.0628],
         [ 0.0397, -0.1002, -0.0371,  ..., -0.3087, -0.4066,  0.0111]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0179],
         [ 0.1023],
         [ 0.0455],
         [ 0.0130],
         [-0.0130],
         [ 0.0243],
         [ 0.0204],
         [-0.0026],
         [ 0.0533],
         [ 0.0022]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2822], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2822], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

sp_para_output:  tensor([[[ 0.2013, -0.0573,  0.1920,  ..., -1.0357,  0.0081,  0.3250],
         [ 0.0480, -0.0024,  0.1255,  ..., -0.7240,  0.0105, -0.0766],
         [-0.0053,  0.2024, -0.0121,  ..., -0.6851, -0.2961, -0.1543],
         ...,
         [-0.0251,  0.3071, -0.0477,  ..., -0.7947, -0.1355,  0.1044],
         [-0.0248,  0.0041,  0.1060,  ..., -0.6783, -0.0346, -0.0438],
         [-0.0026,  0.2568, -0.0501,  ..., -0.4149,  0.0940,  0.0475]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0480],
         [ 0.0768],
         [ 0.0746],
         [-0.0425],
         [ 0.0006],
         [-0.0098],
         [ 0.0656],
         [ 0.0449],
         [ 0.0359],
         [-0.0088]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3130], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3130], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 1

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([2], device='cuda:0'), 2: tensor([0], device='cuda:0'), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 7.9880e-03, -5.2765e-02, -2.0233e-02,  6.3777e-06, -1.0719e-02],
       device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0457,  0.2500,  0.2152, -0.0193,  0.0257, -0.0757], device='cuda:0')
para_sents_offset: [0, 1, 6, 9, 11, 12, 17]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([2], device='cuda:0'), 2: tensor([0], device='cuda:0'), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0624, 0.0391], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0457,  0.2500,  0.2152, -0.0193,  0.0257, -0.0757,  0.1015],
       de

        -0.0100], device='cuda:0')
para_sents_offset: [0, 4, 6, 9, 11, 15, 20, 21, 25, 26]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([1], device='cuda:0'), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([0, 1], device='cuda:0'), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0770, 0.0632, 0.0110], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.1689,  0.1771,  0.1656,  0.1642, -0.0087, -0.1664, -0.0375,  0.2548,
        -0.0100,  0.1512], device='cuda:0')
para_sents_offset: [0, 4, 6, 9, 11, 15, 20, 21, 25, 26, 29]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch

validation_step
qid:  5a8b84905542997f31a41d5d
para_indexes:  tensor([ 22,  99, 188, 227, 321, 334, 415, 471, 608, 704], device='cuda:0')
sp_para_output:  tensor([[[ 0.1895, -0.5224,  0.3874,  ...,  0.2619, -0.0602,  0.3770],
         [-0.0537, -0.2250, -0.1450,  ..., -0.0845,  0.1165,  0.2559],
         [ 0.1193, -0.3607,  0.0734,  ...,  0.4867, -0.3566, -0.1769],
         ...,
         [ 0.0450, -0.0974,  0.0013,  ..., -0.3896, -0.1444,  0.0788],
         [ 0.0700, -0.5518,  0.1547,  ...,  0.1704, -0.1654,  0.1106],
         [ 0.0199, -0.2456,  0.0211,  ..., -0.3340, -0.2193,  0.1291]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0068],
         [ 0.0705],
         [ 0.0493],
         [ 0.0522],
         [ 0.0234],
         [-0.0268],
         [ 0.0735],
         [ 0.1127],
         [ 0.0614],
         [ 0.0431]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3484], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5ab3fa095542992ade7c6f0a
para_indexes:  tensor([  39,   83,  142,  243,  587,  662,  692,  805,  958, 1135],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0159, -0.1180,  0.0585,  ..., -0.4974, -0.2449,  0.0265],
         [-0.1763,  0.4186, -0.0148,  ..., -0.2355, -0.1582, -0.1122],
         [ 0.0799, -0.0611,  0.0786,  ..., -0.5455, -0.4457,  0.0821],
         ...,
         [-0.1991,  0.0250,  0.0937,  ..., -0.3081, -0.2320,  0.0851],
         [-0.1271, -0.1184, -0.0778,  ..., -0.3722, -0.1965,  0.1294],
         [ 0.1319, -0.1297, -0.1351,  ..., -0.5984, -0.3127, -0.1165]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0007],
         [ 0.1364],
         [ 0.0048],
         [-0.0063],
         [ 0.0365],
         [ 0.0238],
         [ 0.0222],
         [-0.0270],
         [ 0.0479],
         [ 0.0657]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3635], device='cuda:0', dtype=torch.flo

         0.0031,  0.0929], device='cuda:0')
para_sents_offset: [0, 2, 4, 7, 11, 14, 16, 18, 20, 21, 23]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([0], device='cuda:0'), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([7, 9], device='cuda:0')
validation_step
qid:  5ac0d732554299294b219030
para_indexes:  tensor([ 19,  91, 170, 213, 268, 344, 394, 478, 530, 626], device='cuda:0')
sp_para_output:  tensor([[[ 0.0426, -0.1692,  0.1321,  ..., -0.5185, -0.2807, -0.1342],
         [-0.2330, -0.0613,  0.0607,  ..., -0.5570, -0.1357, -0.1532],
         [-0.3508, -0.1152, -0.2025,  .

validation_step
qid:  5a7c17c95542990527d5544d
para_indexes:  tensor([  21,  175,  248,  397,  487,  571,  756,  813,  886, 1043],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1670, -0.0481,  0.0904,  ..., -0.6095, -0.2765,  0.0563],
         [-0.0128, -0.0741, -0.1459,  ..., -0.4037, -0.2354, -0.0217],
         [-0.0815, -0.0821, -0.3544,  ..., -0.0704,  0.0061,  0.0827],
         ...,
         [-0.0931, -0.0662, -0.0688,  ..., -0.5865, -0.0048,  0.0486],
         [ 0.0061, -0.0254, -0.0055,  ..., -0.5540,  0.0602,  0.2151],
         [ 0.0424,  0.0288, -0.1626,  ..., -0.6595, -0.3485,  0.0470]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0773],
         [ 0.0693],
         [ 0.0461],
         [ 0.0074],
         [ 0.0588],
         [ 0.1198],
         [ 0.0536],
         [-0.0173],
         [-0.0009],
         [ 0.1064]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3259], device='cuda:0', dtype=torch.flo

validation_step
qid:  5a89fea655429970aeb701eb
para_indexes:  tensor([ 20, 134, 191, 249, 306, 373, 486, 558, 636, 697], device='cuda:0')
sp_para_output:  tensor([[[ 4.9531e-03,  1.1851e-01,  1.4918e-01,  ..., -2.4852e-01,
          -3.4937e-01, -2.8875e-01],
         [ 4.2597e-02, -8.9461e-02,  1.5662e-02,  ..., -4.7275e-01,
          -3.6217e-01, -4.4987e-02],
         [-3.4989e-02,  1.3462e-04,  2.0929e-01,  ..., -4.7340e-01,
          -4.9127e-01, -4.4357e-02],
         ...,
         [ 1.0822e-01, -8.1891e-02,  1.0015e-01,  ..., -8.0093e-01,
           3.2312e-02, -1.1610e-01],
         [-1.3847e-01, -1.3700e-02, -4.2548e-02,  ..., -4.6028e-01,
           4.9227e-02,  2.5829e-01],
         [-9.3149e-02,  1.2800e-01,  2.9398e-02,  ..., -1.5644e-01,
          -4.8156e-01, -4.5496e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[-0.0218],
         [-0.0295],
         [ 0.0507],
         [-0.0637],
         [-0.0867],
         [-0.0201],
         [ 0.0598],
         [ 0.0029],
   

sp_para_output:  tensor([[[ 0.0517,  0.1765,  0.1443,  ..., -0.6312, -0.0239,  0.0059],
         [-0.0859,  0.0520,  0.0361,  ...,  0.0236,  0.0306,  0.2211],
         [ 0.1289, -0.0129, -0.0909,  ..., -0.4601, -0.2912, -0.3348],
         ...,
         [-0.0475,  0.1506,  0.1782,  ..., -0.4178,  0.0720, -0.0385],
         [-0.0640, -0.0137,  0.0581,  ..., -0.4761, -0.2490, -0.1841],
         [ 0.0278,  0.0205,  0.1456,  ..., -0.6987, -0.2342,  0.2341]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0095],
         [ 0.0243],
         [-0.0026],
         [-0.0059],
         [-0.0936],
         [-0.0412],
         [-0.0275],
         [-0.0336],
         [ 0.0309],
         [ 0.0457]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.5107], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.5107], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 2, 3, 4

sp_para_output:  tensor([[[-0.3092,  0.4562, -0.0070,  ..., -0.8602, -0.2150,  0.1810],
         [ 0.0759,  0.1621,  0.2847,  ...,  0.1747, -0.0746,  0.1932],
         [-0.0322, -0.1040,  0.0570,  ..., -0.2094, -0.2363,  0.2340],
         ...,
         [ 0.0104,  0.1115,  0.1988,  ..., -0.1285, -0.2532, -0.0762],
         [-0.3062, -0.0305,  0.4270,  ..., -0.0185, -0.0874,  0.2246],
         [ 0.0861, -0.0227,  0.1586,  ..., -0.0175, -0.2108,  0.1892]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.1061],
         [ 0.0269],
         [-0.0123],
         [-0.0311],
         [ 0.0879],
         [ 0.1038],
         [ 0.0636],
         [-0.0173],
         [ 0.0304]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2195], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2195], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 

validation_step
qid:  5ae1d1a55542997f29b3c138
para_indexes:  tensor([ 40,  95, 227, 313, 410, 457, 525, 729, 804, 932], device='cuda:0')
sp_para_output:  tensor([[[-0.0856, -0.0473,  0.2989,  ...,  0.0827, -0.3043, -0.0160],
         [-0.0549,  0.2162, -0.0636,  ..., -0.5217,  0.0645,  0.1185],
         [-0.2208,  0.1670,  0.1571,  ..., -0.6975, -0.0621,  0.1996],
         ...,
         [-0.4787,  0.1065,  0.0207,  ..., -0.3366, -0.0494,  0.0873],
         [-0.3178,  0.0602,  0.0263,  ..., -0.1732, -0.1965,  0.0294],
         [-0.1380,  0.2904, -0.0810,  ..., -0.1808,  0.2103, -0.1976]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0259],
         [ 0.0403],
         [ 0.0082],
         [-0.0174],
         [ 0.0145],
         [ 0.0519],
         [ 0.0330],
         [ 0.0700],
         [-0.0914],
         [ 0.0187]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2720], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits: tensor([-0.0532, -0.0076], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0592,  0.0434,  0.0420,  0.3543, -0.0058, -0.0196,  0.1160,  0.4869,
         0.1123, -0.0608], device='cuda:0')
para_sents_offset: [0, 2, 3, 5, 12, 14, 22, 25, 32, 35, 37]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([0], device='cuda:0'), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([7, 3], device='cuda:0')
validation_step
qid:  5ac213805542992f1f2b37e7
para_indexes:  tensor([  27,  122,  276,  354,  432,  627,  789,  959, 1403, 1597],
       device='cuda:

sp_para_output:  tensor([[[-0.0939, -0.1998,  0.0947,  ..., -0.3575, -0.0868, -0.2070],
         [-0.2895, -0.4716,  0.0549,  ..., -0.0463, -0.1303, -0.3750],
         [-0.1221, -0.1467,  0.1193,  ..., -0.4197, -0.1288, -0.3323],
         ...,
         [-0.1462, -0.1597,  0.0138,  ..., -0.2815,  0.0366, -0.1063],
         [-0.0904, -0.1312,  0.0241,  ..., -0.5428,  0.0651, -0.0782],
         [-0.1202, -0.5486,  0.0663,  ..., -0.1428,  0.0395, -0.2252]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0724],
         [ 0.0325],
         [ 0.0069],
         [-0.0135],
         [ 0.0354],
         [-0.0206],
         [ 0.0339],
         [ 0.0244],
         [ 0.0428]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3992], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3992], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 2, 3, 4, 5, 5, 6, 7, 8]
par

sp_para_output:  tensor([[[ 0.0564,  0.0374,  0.1394,  ..., -0.0708, -0.0702,  0.0540],
         [ 0.0496, -0.0702, -0.0457,  ..., -0.4132, -0.2315, -0.0888],
         [-0.0270,  0.5017,  0.0894,  ..., -0.3687, -0.2032, -0.2541],
         ...,
         [-0.0829, -0.0670,  0.2407,  ...,  0.0807, -0.1361, -0.1126],
         [-0.3205, -0.1430,  0.3832,  ..., -0.7084, -0.0306, -0.1510],
         [-0.0255,  0.1293,  0.0936,  ..., -0.3038, -0.1178, -0.0692]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0214],
         [ 0.0232],
         [ 0.0568],
         [-0.0104],
         [ 0.0753],
         [ 0.0310],
         [ 0.1168],
         [-0.0273],
         [ 0.0183],
         [ 0.0116]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3318], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3318], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

validation_step
qid:  5a7fb23f5542994857a767be
para_indexes:  tensor([ 20, 122, 185, 237, 270, 340, 413, 527, 560, 679], device='cuda:0')
sp_para_output:  tensor([[[ 0.1833,  0.0295,  0.3117,  ..., -0.0960,  0.0907, -0.1644],
         [ 0.1573, -0.0063,  0.1243,  ..., -0.4374,  0.0396, -0.4406],
         [ 0.0614,  0.1056,  0.2207,  ..., -0.2796, -0.0194, -0.2150],
         ...,
         [ 0.2832, -0.0034,  0.0767,  ..., -0.0705,  0.1548, -0.1686],
         [ 0.3033,  0.0446,  0.2943,  ..., -0.3208, -0.0751, -0.0276],
         [ 0.1710,  0.0328,  0.0563,  ..., -0.4414, -0.0687, -0.2209]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0446],
         [-0.0058],
         [-0.0366],
         [-0.0392],
         [-0.0371],
         [ 0.0140],
         [-0.0131],
         [-0.0077],
         [-0.0449],
         [-0.0372]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3931], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits_sum: tensor([-0.2084,  0.1319,  0.0008, -0.0363], device='cuda:0')
para_sents_offset: [0, 2, 7, 10, 11]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0098,  0.0271,  0.0403, -0.0104], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([-0.2084,  0.1319,  0.0008, -0.0363,  0.0472], device='cuda:0')
para_sents_offset: [0, 2, 7, 10, 11, 15]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0425, -0.0685, -0.0591, -0.0366,  0.0189, -0.0626], device='cuda:0',
       dtype=torch.float16)
para_sen

sp_para_output:  tensor([[[ 0.2079, -0.1120,  0.3705,  ..., -0.2852, -0.1982,  0.1075],
         [ 0.3091, -0.0147,  0.2598,  ..., -0.5448, -0.1837, -0.1377],
         [ 0.2751, -0.1348,  0.2897,  ..., -0.6261, -0.2502, -0.2961],
         ...,
         [ 0.2187,  0.2341,  0.1792,  ..., -0.4370, -0.0518, -0.0852],
         [ 0.3762,  0.0783,  0.2381,  ..., -0.5831, -0.2092,  0.0159],
         [ 0.3334,  0.0717,  0.0364,  ..., -0.6497, -0.3832, -0.1547]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.1057],
         [ 0.0645],
         [-0.0112],
         [-0.0141],
         [ 0.0304],
         [ 0.0869],
         [ 0.1040],
         [ 0.1351],
         [-0.0692],
         [ 0.0152]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3267], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3267], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

sp_para_output:  tensor([[[ 0.0513, -0.3604, -0.0166,  ..., -0.2268,  0.0875,  0.0596],
         [-0.2831, -0.4462, -0.0374,  ...,  0.0444,  0.1780,  0.1785],
         [-0.1892, -0.4593, -0.0850,  ..., -0.2839,  0.2197,  0.3402],
         ...,
         [-0.4905, -0.2104, -0.1201,  ..., -0.1908,  0.1992,  0.0550],
         [-0.3699, -0.2717, -0.1884,  ..., -0.0389,  0.1401, -0.1175],
         [-0.3491, -0.2420, -0.1090,  ..., -0.2269,  0.0859, -0.0588]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0294],
         [0.0483],
         [0.1237],
         [0.0849],
         [0.0679],
         [0.0188],
         [0.0650],
         [0.0087],
         [0.0654]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3936], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3936], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 8]
para_s

validation_step
qid:  5a7ae2f2554299042af8f6aa
para_indexes:  tensor([ 22, 148, 247, 314, 420, 465, 571, 640, 741, 820], device='cuda:0')
sp_para_output:  tensor([[[-0.1591,  0.2100, -0.1130,  ..., -0.0816,  0.0308,  0.0225],
         [ 0.0232, -0.0706,  0.1603,  ..., -0.0383, -0.2835,  0.0896],
         [ 0.0413, -0.1151,  0.2795,  ...,  0.0911, -0.0325,  0.2393],
         ...,
         [-0.2522,  0.1724, -0.0937,  ..., -0.0726,  0.2861,  0.0050],
         [-0.0862,  0.1649,  0.1590,  ..., -0.1209, -0.1984,  0.2301],
         [-0.0582, -0.0132,  0.1749,  ...,  0.2160, -0.2638, -0.1575]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0295],
         [ 0.0206],
         [ 0.0268],
         [-0.0261],
         [ 0.0729],
         [ 0.1053],
         [ 0.0518],
         [ 0.0414],
         [ 0.0403],
         [ 0.0062]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3022], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.1067,  0.0089,  0.0309,  ..., -0.2050, -0.0993, -0.1271],
         [-0.2088, -0.1612,  0.0491,  ..., -0.3416,  0.0156,  0.1870],
         [-0.1230, -0.2737, -0.0521,  ..., -0.4629,  0.0118, -0.1422],
         ...,
         [-0.0941, -0.0859, -0.1361,  ..., -0.0418, -0.1537, -0.2220],
         [-0.3015,  0.0846, -0.0639,  ..., -0.3668,  0.0599,  0.2529],
         [-0.3263, -0.0103,  0.0777,  ..., -0.5348, -0.0862,  0.0033]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0264],
         [ 0.0195],
         [ 0.0298],
         [-0.0489],
         [ 0.0311],
         [-0.0262],
         [-0.0281],
         [ 0.0020]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3447], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3447], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 6, 6, 7]
para_sent_l

validation_step
qid:  5adf5daf5542995534e8c79d
para_indexes:  tensor([ 19, 110, 249, 258, 282, 329, 473, 507, 605, 694], device='cuda:0')
sp_para_output:  tensor([[[-0.0933,  0.2650, -0.1286,  ..., -0.8079, -0.2224,  0.1257],
         [ 0.1424, -0.2333,  0.0664,  ..., -0.3127,  0.0161,  0.0605],
         [ 0.0128, -0.1320,  0.1116,  ..., -0.3961, -0.0922,  0.0127],
         ...,
         [ 0.1564,  0.1659,  0.1510,  ..., -0.1758, -0.1942,  0.0970],
         [-0.1310,  0.0367,  0.2102,  ..., -0.5496,  0.0630,  0.1426],
         [-0.0858,  0.0609,  0.2894,  ..., -0.3139, -0.0973,  0.0245]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0209],
         [ 0.0340],
         [-0.0293],
         [-0.0541],
         [-0.0505],
         [ 0.0008],
         [ 0.0864],
         [ 0.0637],
         [ 0.0589],
         [ 0.0010]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4231], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits: tensor([ 0.0230, -0.0148, -0.0083], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-7.7637e-02,  2.6718e-02, -9.9182e-05], device='cuda:0')
para_sents_offset: [0, 2, 3, 6]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0029, -0.0520, -0.0545, -0.0218], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([-7.7637e-02,  2.6718e-02, -9.9182e-05, -1.2541e-01], device='cuda:0')
para_sents_offset: [0, 2, 3, 6, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0452, -0.1014], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-7.7637e-02,  2.6718e-02, -9.91

validation_step
qid:  5a7103b35542994082a3e4c0
para_indexes:  tensor([ 20,  43, 103, 177, 276, 321, 392, 461, 511, 586], device='cuda:0')
sp_para_output:  tensor([[[-0.1229, -0.0395,  0.1905,  ..., -0.1672,  0.2724,  0.0092],
         [-0.0454, -0.1494,  0.1163,  ..., -0.2376,  0.1334, -0.1353],
         [ 0.0295,  0.0251,  0.1390,  ..., -0.4588, -0.0198, -0.0037],
         ...,
         [-0.2922, -0.3705,  0.1898,  ..., -0.0471,  0.1277, -0.4124],
         [-0.2016,  0.0214, -0.1320,  ..., -0.4454,  0.4643,  0.3620],
         [-0.1368, -0.0670,  0.1107,  ..., -0.1731,  0.1129,  0.2662]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0358],
         [ 0.0240],
         [ 0.0766],
         [ 0.0176],
         [ 0.0056],
         [ 0.0017],
         [-0.0131],
         [ 0.0622],
         [-0.0176],
         [ 0.0479]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3606], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits_sum: tensor([0.3427, 0.4680, 0.1071, 0.2060, 0.3877, 0.5519], device='cuda:0')
para_sents_offset: [0, 6, 10, 13, 18, 24, 28]
evidence_candidates: {0: tensor([3, 5], device='cuda:0'), 1: tensor([0, 1, 2, 3], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([2], device='cuda:0'), 5: tensor([1, 3], device='cuda:0')}
para_sent_logits: tensor([0.1261, 0.0624], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.3427, 0.4680, 0.1071, 0.2060, 0.3877, 0.5519, 0.1885],
       device='cuda:0')
para_sents_offset: [0, 6, 10, 13, 18, 24, 28, 30]
evidence_candidates: {0: tensor([3, 5], device='cuda:0'), 1: tensor([0, 1, 2, 3], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([2], device='cuda:0'), 5: tensor([1, 3], device='cuda:0'), 6: tensor([0], device='cuda:0')}
para_sent_logits: tensor([0.0707, 0.0074,

validation_step
qid:  5a74872655429979e2882978
para_indexes:  tensor([ 21,  57, 121, 168, 226, 298, 345, 371, 405, 486], device='cuda:0')
sp_para_output:  tensor([[[-4.6929e-05,  3.4257e-01, -2.7298e-02,  ..., -3.8620e-01,
           8.2783e-02,  1.2999e-01],
         [-1.0141e-01,  2.0204e-02,  4.7029e-02,  ..., -6.8056e-01,
          -1.5201e-01,  1.6021e-01],
         [ 1.8065e-01, -9.9684e-02, -3.6954e-01,  ..., -2.3794e-01,
          -4.5552e-01, -3.0404e-01],
         ...,
         [ 1.1499e-02,  1.6067e-02,  2.3371e-02,  ..., -5.5535e-01,
          -7.5662e-02, -8.0499e-02],
         [-4.7431e-02,  2.3148e-02,  1.8864e-02,  ..., -7.5687e-01,
           8.0645e-02,  1.8326e-01],
         [-4.3300e-02, -4.7189e-02,  6.8461e-03,  ..., -4.9334e-01,
          -1.7535e-01, -2.1274e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0024],
         [ 0.1024],
         [-0.0115],
         [-0.0076],
         [ 0.0078],
         [-0.0473],
         [ 0.0323],
         [-0.0141],
   

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([5, 1], device='cuda:0')
validation_step
qid:  5a8f608d5542992414482a94
para_indexes:  tensor([ 16,  89, 131, 202, 234, 340, 483, 525, 566, 636], device='cuda:0')
sp_para_output:  tensor([[[ 0.1366, -0.2924,  0.2290,  ...,  0.0024, -0.1381,  0.2288],
         [-0.0270,  0.1492,  0.2725,  ..., -0.6561,  0.1370,  0.1053],
         [ 0.0754,  0.0412,  0.0856,  ..., -0.5326,  0.0671,  0.1568],
         ...,
         [ 0.1737,  0.0772,  0.2227,  ..

validation_step
qid:  5a78f59755429970f5fffdf8
para_indexes:  tensor([ 20,  65, 168, 254, 315, 454, 554, 685, 834, 931], device='cuda:0')
sp_para_output:  tensor([[[ 0.1287, -0.1984,  0.1425,  ..., -0.5114, -0.0577,  0.0385],
         [-0.2459, -0.4523,  0.2672,  ..., -0.0229,  0.1146,  0.1356],
         [ 0.0453,  0.0215,  0.0694,  ..., -0.3840, -0.1859,  0.1507],
         ...,
         [-0.0204,  0.0117,  0.0108,  ..., -0.4883, -0.3052, -0.3626],
         [ 0.1659,  0.0212, -0.1071,  ..., -0.4327, -0.1541,  0.0151],
         [ 0.1183,  0.0159, -0.1162,  ..., -0.2569, -0.2425,  0.0459]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0215],
         [ 0.0304],
         [-0.0147],
         [-0.0064],
         [-0.0416],
         [ 0.0249],
         [-0.0130],
         [ 0.1006],
         [ 0.0391],
         [-0.0144]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3623], device='cuda:0', dtype=torch.float16), 'start_log

       device='cuda:0')
para_sents_offset: [0, 1, 2, 4, 5, 9, 11, 12]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0089,  0.0321], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0420,  0.0195,  0.0521,  0.0612,  0.1311, -0.0134, -0.0080,  0.0233],
       device='cuda:0')
para_sents_offset: [0, 1, 2, 4, 5, 9, 11, 12, 14]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], 

sp_para_output:  tensor([[[ 0.2267, -0.0333, -0.0762,  ..., -0.4191,  0.1336,  0.2669],
         [-0.0459, -0.0081,  0.1998,  ..., -0.3993, -0.0129,  0.2066],
         [ 0.0683, -0.0765,  0.1840,  ...,  0.0761, -0.3181, -0.1674],
         ...,
         [-0.1764, -0.0069,  0.0946,  ...,  0.0990, -0.0583,  0.0774],
         [ 0.0602, -0.1147,  0.0779,  ...,  0.0874, -0.0183,  0.0548],
         [-0.0392,  0.1864,  0.1590,  ..., -0.2408, -0.2267, -0.1425]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0371],
         [ 0.1377],
         [ 0.0857],
         [-0.0372],
         [ 0.0441],
         [-0.0447],
         [ 0.0347],
         [ 0.0936],
         [ 0.0627],
         [ 0.0155]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2534], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2534], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

validation_step
qid:  5a825da055429954d2e2eb17
para_indexes:  tensor([  26,   52,  288,  434,  500,  566,  670,  854,  956, 1061],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1237,  0.0168, -0.0115,  ..., -0.2094, -0.2607, -0.0140],
         [-0.0230, -0.3352,  0.1949,  ..., -0.0767, -0.1370,  0.0335],
         [-0.1344, -0.0437,  0.2577,  ..., -0.2631,  0.0434, -0.0646],
         ...,
         [-0.0075, -0.2538, -0.0129,  ..., -0.2830, -0.0852, -0.0626],
         [ 0.0425, -0.0267,  0.0851,  ..., -0.0998, -0.0664,  0.1021],
         [ 0.0678, -0.0759,  0.1355,  ..., -0.1175, -0.1184, -0.0044]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0956],
         [ 0.0303],
         [-0.0543],
         [ 0.0710],
         [ 0.0859],
         [ 0.0820],
         [ 0.0083],
         [ 0.0444],
         [ 0.1315],
         [ 0.0205]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3335], device='cuda:0', dtype=torch.flo

       dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0002,  0.1593,  0.0828,  0.0263,  0.0167,  0.1370], device='cuda:0')
para_sents_offset: [0, 2, 6, 8, 9, 11, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0030,  0.0051], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0002,  0.1593,  0.0828,  0.0263,  0.0167,  0.1370,  0.0021],
       device='cuda:0')
para_sents_offset: [0, 2, 6, 8, 9, 11, 16, 18]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cud

sp_para_output:  tensor([[[ 0.0363,  0.0525,  0.3606,  ..., -0.0626, -0.3284, -0.3593],
         [ 0.0255,  0.1332,  0.0151,  ..., -0.4749,  0.0469, -0.0469],
         [-0.1138, -0.0184,  0.1898,  ..., -0.3584, -0.1696,  0.0780],
         ...,
         [-0.0380,  0.2262,  0.0634,  ..., -0.7601, -0.1936, -0.1613],
         [ 0.1724, -0.0167,  0.1072,  ..., -0.4793, -0.1366,  0.1344],
         [-0.0302, -0.1699,  0.3178,  ..., -0.0665, -0.4094, -0.0532]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0725],
         [ 0.0322],
         [ 0.0206],
         [-0.0130],
         [ 0.0296],
         [ 0.1346],
         [ 0.0602],
         [-0.0167],
         [-0.0206],
         [ 0.0641]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2876], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2876], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

validation_step
qid:  5a85fb5e5542994775f606df
para_indexes:  tensor([ 18,  98, 143, 280, 307, 385, 483, 565], device='cuda:0')
sp_para_output:  tensor([[[ 0.2182,  0.0138,  0.2421,  ...,  0.0045, -0.0440,  0.0295],
         [ 0.0202, -0.0627,  0.1156,  ...,  0.0121, -0.1421, -0.0578],
         [-0.0020, -0.1784,  0.1841,  ...,  0.3702, -0.3730, -0.1846],
         ...,
         [ 0.1182, -0.0667,  0.0843,  ...,  0.2946, -0.1044, -0.2282],
         [ 0.0611,  0.0449,  0.0835,  ...,  0.2534, -0.2813, -0.1428],
         [ 0.0855, -0.0469,  0.1120,  ...,  0.5683, -0.2191, -0.0332]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0046],
         [ 0.0489],
         [-0.0171],
         [-0.0372],
         [-0.0370],
         [-0.0748],
         [ 0.0717],
         [ 0.0405]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2979], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_scor

validation_step
qid:  5ab5c263554299488d4d9a18
para_indexes:  tensor([ 27,  69, 143, 221, 308, 394, 474, 517, 594, 680], device='cuda:0')
sp_para_output:  tensor([[[ 0.0585,  0.4083, -0.1211,  ..., -0.0997,  0.0502,  0.1175],
         [ 0.1213, -0.2000,  0.2598,  ..., -0.2041, -0.0013,  0.1252],
         [ 0.2270, -0.3886,  0.1075,  ...,  0.2362, -0.1168,  0.0442],
         ...,
         [ 0.1611, -0.3969,  0.2075,  ...,  0.2506,  0.1484, -0.0611],
         [ 0.0266, -0.3000,  0.2584,  ...,  0.1317, -0.0585,  0.2240],
         [-0.0896, -0.2669,  0.3114,  ..., -0.0772,  0.0528,  0.3489]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0093],
         [ 0.0835],
         [ 0.0792],
         [ 0.0669],
         [ 0.0419],
         [ 0.0310],
         [ 0.0739],
         [ 0.0498],
         [ 0.0956],
         [ 0.0478]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2205], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a7137dc5542994082a3e68a
para_indexes:  tensor([ 21,  45,  77, 139, 164, 273, 306, 366, 400, 429], device='cuda:0')
sp_para_output:  tensor([[[-0.0341, -0.2856,  0.1258,  ..., -0.0962, -0.2027, -0.0285],
         [-0.2504, -0.3017,  0.1064,  ..., -0.2738, -0.2125,  0.2987],
         [-0.2165, -0.2255, -0.0098,  ..., -0.3041, -0.2510,  0.2189],
         ...,
         [-0.0136, -0.2905,  0.0591,  ..., -0.6026, -0.2296, -0.2361],
         [-0.1110, -0.1925,  0.0646,  ..., -0.1808, -0.3213,  0.0997],
         [-0.2097, -0.2957,  0.0557,  ..., -0.3056, -0.2971,  0.2789]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0829],
         [0.1277],
         [0.1042],
         [0.0902],
         [0.0635],
         [0.0326],
         [0.0258],
         [0.0150],
         [0.0341],
         [0.0931]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3677], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

validation_step
qid:  5a8ba0855542995d1e6f1427
para_indexes:  tensor([  17,  181,  230,  300,  389,  551,  722,  816,  916, 1020],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.3022, -0.0086, -0.0675,  ..., -0.2936,  0.0509,  0.2699],
         [ 0.1679, -0.0515,  0.0518,  ..., -0.4537,  0.1465,  0.2876],
         [ 0.1100, -0.1022,  0.1568,  ..., -0.5599, -0.1904,  0.0186],
         ...,
         [ 0.1108,  0.0244,  0.0698,  ..., -0.2305, -0.1944, -0.2319],
         [ 0.1453,  0.2277,  0.0358,  ..., -0.6297,  0.0118,  0.2077],
         [ 0.2325,  0.0618, -0.0578,  ..., -0.4367, -0.1643,  0.0907]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0446],
         [-0.0094],
         [-0.0307],
         [-0.0611],
         [-0.0604],
         [-0.0216],
         [ 0.0463],
         [-0.0655],
         [-0.0228],
         [ 0.0726]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3994], device='cuda:0', dtype=torch.flo

s_to_p_map:  [0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9]
para_sent_logits: tensor([0.0479, 0.0795, 0.0340, 0.0635, 0.0693, 0.1077], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.4019], device='cuda:0')
para_sents_offset: [0, 6]
evidence_candidates: {0: tensor([5], device='cuda:0')}
para_sent_logits: tensor([0.0359, 0.0740], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.4019, 0.1099], device='cuda:0')
para_sents_offset: [0, 6, 8]
evidence_candidates: {0: tensor([5], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.1000, 0.0240, 0.0538], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.4019, 0.1099, 0.1777], device='cuda:0')
para_sents_offset: [0, 6, 8, 11]
evidence_candidates: {0: tensor([5], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0'

sp_para_output:  tensor([[[-0.0620,  0.0344,  0.0892,  ..., -0.3095, -0.0186,  0.0270],
         [-0.0755, -0.2043, -0.0269,  ..., -0.2890,  0.0845, -0.0602],
         [-0.1980,  0.0366,  0.0137,  ..., -0.4046,  0.3370,  0.3681],
         ...,
         [-0.1742, -0.0566,  0.1112,  ..., -0.4019,  0.0352, -0.0894],
         [-0.2721,  0.0488,  0.1081,  ..., -0.1150, -0.0416,  0.0535],
         [-0.0016,  0.0883, -0.2070,  ..., -0.0523,  0.1363,  0.4424]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0410],
         [ 0.0490],
         [ 0.0101],
         [-0.0184],
         [ 0.0363],
         [ 0.0743],
         [ 0.0292],
         [ 0.0415],
         [-0.0101],
         [ 0.0223]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3318], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3318], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 2, 2

para_sent_logits: tensor([-0.0031, -0.0225], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0245,  0.0914,  0.1337,  0.0261,  0.0278,  0.1911, -0.0257],
       device='cuda:0')
para_sents_offset: [0, 1, 2, 5, 6, 8, 11, 13]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([0], device='cuda:0'), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0148, -0.0254], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0245,  0.0914,  0.1337,  0.0261,  0.0278,  0.1911, -0.0257, -0.0402],
       device='cuda:0')
para_sents_offset: [0, 1, 2, 5, 6, 8, 11, 13, 15]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tenso

sp_para_output:  tensor([[[-0.0736, -0.2194,  0.1096,  ...,  0.0543,  0.0257,  0.2153],
         [-0.1551,  0.2288,  0.0043,  ..., -0.0579, -0.0669, -0.1148],
         [-0.0452, -0.0842, -0.0445,  ..., -0.0279,  0.1083,  0.1767],
         ...,
         [-0.1254, -0.1075,  0.0278,  ...,  0.1117,  0.0296,  0.0310],
         [-0.3318, -0.2581, -0.1740,  ...,  0.4615,  0.0252, -0.0363],
         [-0.2423, -0.0968, -0.0898,  ..., -0.0817,  0.0605, -0.0409]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0471],
         [ 0.0199],
         [-0.0029],
         [-0.0241],
         [ 0.0206],
         [ 0.0673],
         [ 0.0453],
         [ 0.0651],
         [-0.0002],
         [-0.0128]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3206], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3206], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

       device='cuda:0')
para_sents_offset: [0, 1, 4, 7, 9, 10, 13, 15]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([1], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0323], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0065,  0.1658,  0.1501,  0.0548, -0.0157,  0.0479, -0.0512,  0.0323],
       device='cuda:0')
para_sents_offset: [0, 1, 4, 7, 9, 10, 13, 15, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([1], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tens

para_sent_logits: tensor([0.1192, 0.0067, 0.0536], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1545, 0.2492, 0.1795], device='cuda:0')
para_sents_offset: [0, 2, 6, 9]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0], device='cuda:0')}
para_sent_logits: tensor([0.0540], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1545, 0.2492, 0.1795, 0.0540], device='cuda:0')
para_sents_offset: [0, 2, 6, 9, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0], device='cuda:0'), 3: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0789, -0.0098,  0.0260], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1545, 0.2492, 0.1795, 0.0540, 0.0951], device='cuda:0')
para_sents_offset: [0, 2, 6, 9, 10, 13]
evidence_candidates: {0: tensor([

sp_para_output:  tensor([[[ 0.2532, -0.3164,  0.0324,  ..., -0.4635,  0.0628,  0.0846],
         [-0.0587, -0.3889, -0.0253,  ..., -0.6034, -0.3564, -0.0414],
         [ 0.0413, -0.1653, -0.0144,  ..., -1.0365, -0.2704, -0.1374],
         ...,
         [ 0.0022, -0.1691,  0.0094,  ..., -0.8354, -0.3218, -0.3404],
         [ 0.2190, -0.2840,  0.0283,  ..., -0.7835, -0.2057,  0.1579],
         [-0.0619, -0.2722,  0.0168,  ..., -0.9395, -0.3344, -0.0890]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0179],
         [ 0.0049],
         [ 0.0307],
         [ 0.0473],
         [-0.0187],
         [-0.0690],
         [-0.0114],
         [ 0.0031],
         [-0.0521],
         [ 0.0127]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3901], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3901], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 2, 3, 3

validation_step
qid:  5ae237ac554299492dc91be1
para_indexes:  tensor([  19,   98,  217,  416,  516,  604,  694,  833,  963, 1065],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1179,  0.0893,  0.0868,  ..., -0.1721, -0.0995,  0.2181],
         [-0.1086,  0.5205,  0.1141,  ..., -0.2086, -0.0896, -0.2398],
         [-0.2108,  0.0689,  0.0977,  ..., -0.3961, -0.1368, -0.0628],
         ...,
         [-0.0214,  0.0109, -0.0383,  ..., -0.6090, -0.1174, -0.0563],
         [ 0.0370,  0.1028,  0.0648,  ...,  0.2352, -0.2819,  0.3958],
         [-0.0449,  0.1318, -0.0832,  ..., -0.2142,  0.0039,  0.0392]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0026],
         [-0.0769],
         [ 0.0792],
         [-0.0116],
         [ 0.0028],
         [ 0.0793],
         [-0.0022],
         [ 0.0440],
         [ 0.0701],
         [ 0.0330]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3811], device='cuda:0', dtype=torch.flo

sp_para_output:  tensor([[[ 0.0154,  0.1297,  0.0204,  ..., -0.0926, -0.3338,  0.1293],
         [-0.2848,  0.3243, -0.0311,  ..., -0.2296,  0.2214,  0.1220],
         [-0.2483,  0.0284,  0.0506,  ..., -0.0693, -0.1415, -0.2772],
         ...,
         [-0.0211, -0.1867,  0.1001,  ..., -0.5791, -0.1408,  0.1687],
         [ 0.0288,  0.1125, -0.0058,  ..., -0.0988, -0.2755, -0.0193],
         [-0.2348,  0.0344, -0.0471,  ..., -0.2984, -0.2513,  0.1623]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0036],
         [ 0.0152],
         [-0.0516],
         [ 0.0556],
         [-0.0321],
         [-0.0526],
         [-0.0278],
         [-0.0110],
         [ 0.0098],
         [-0.0177]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3330], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3330], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

validation_step
qid:  5add2dae5542992c1e3a2558
para_indexes:  tensor([ 19,  83, 123, 184, 244, 324, 430, 488, 555, 640], device='cuda:0')
sp_para_output:  tensor([[[-0.0406, -0.2694, -0.0061,  ..., -0.8562, -0.2782, -0.0807],
         [-0.1513, -0.3106,  0.1433,  ..., -0.6298, -0.0905,  0.0792],
         [-0.0255, -0.3146,  0.0583,  ..., -0.2148, -0.3880, -0.1926],
         ...,
         [-0.0080, -0.2919, -0.0100,  ..., -0.3911, -0.3700, -0.1660],
         [-0.0736, -0.4340,  0.1756,  ..., -0.3302,  0.0690,  0.1423],
         [-0.0089, -0.2784,  0.0322,  ..., -0.2606, -0.3177,  0.0185]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.1306],
         [0.1764],
         [0.0895],
         [0.0651],
         [0.0247],
         [0.0385],
         [0.0689],
         [0.0817],
         [0.0807],
         [0.1289]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3110], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

sp_para_output:  tensor([[[ 0.3482, -0.2170,  0.1545,  ..., -0.6615, -0.2893, -0.0894],
         [ 0.1492, -0.0820,  0.1295,  ..., -0.7450, -0.4295, -0.0936],
         [ 0.1603, -0.0407,  0.1952,  ..., -0.7909, -0.2797, -0.2393],
         ...,
         [ 0.0577,  0.0570,  0.1060,  ..., -0.8914, -0.0344, -0.1221],
         [ 0.2964, -0.0064,  0.0072,  ..., -0.6173, -0.3756, -0.1197],
         [-0.1108,  0.2845,  0.0093,  ..., -0.6810, -0.2463,  0.1488]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0006],
         [ 0.0244],
         [ 0.0909],
         [ 0.0399],
         [-0.0397],
         [-0.0175],
         [-0.0679],
         [ 0.0326],
         [ 0.0596],
         [ 0.0822]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4219], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4219], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 2, 3, 3

validation_step
qid:  5abe48675542991f66106119
para_indexes:  tensor([ 25, 151, 181, 235, 282, 329, 419, 459, 507, 592], device='cuda:0')
sp_para_output:  tensor([[[-0.2646, -0.0290, -0.1081,  ..., -0.0401,  0.2615,  0.1419],
         [-0.1714, -0.0625,  0.1219,  ..., -0.3153, -0.0316,  0.4142],
         [-0.3554, -0.1992,  0.1142,  ..., -0.2657, -0.0627,  0.2814],
         ...,
         [-0.3647, -0.1296,  0.2887,  ..., -0.2786, -0.1197,  0.2481],
         [-0.1683, -0.2350,  0.1719,  ..., -0.0242, -0.2136,  0.0101],
         [-0.0046, -0.0832,  0.2786,  ..., -0.3396,  0.1339,  0.1423]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0404],
         [-0.0264],
         [ 0.0316],
         [-0.0271],
         [-0.0093],
         [ 0.0014],
         [ 0.0339],
         [ 0.0432],
         [ 0.0454],
         [ 0.0986]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3406], device='cuda:0', dtype=torch.float16), 'start_log

       device='cuda:0')
para_sents_offset: [0, 5, 7, 9, 12, 14, 16, 18]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0944, 0.0319], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1642,  0.0223,  0.0825,  0.0609, -0.0860,  0.0015,  0.0662,  0.1263],
       device='cuda:0')
para_sents_offset: [0, 5, 7, 9, 12, 14, 16, 18, 20]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([]

sp_para_output:  tensor([[[ 0.2776,  0.0684, -0.0684,  ..., -0.6911, -0.3843,  0.1229],
         [ 0.1143,  0.1078, -0.0131,  ..., -0.6498, -0.1258,  0.0055],
         [ 0.0924,  0.4486,  0.0944,  ..., -0.2264, -0.1662,  0.1210],
         ...,
         [ 0.1817,  0.0465, -0.0230,  ..., -0.4341, -0.0492,  0.1629],
         [ 0.0529,  0.3027,  0.1258,  ..., -0.4760, -0.1975,  0.1933],
         [ 0.1381,  0.1564,  0.1747,  ..., -0.2892, -0.0195,  0.1093]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0491],
         [ 0.0446],
         [-0.0286],
         [ 0.0352],
         [ 0.0851],
         [ 0.1020],
         [ 0.0834],
         [ 0.0028],
         [ 0.0925],
         [ 0.1059]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3093], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3093], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

validation_step
qid:  5ab852c05542990e739ec8c5
para_indexes:  tensor([ 20, 109, 157, 234, 292, 363, 452, 508, 651, 678], device='cuda:0')
sp_para_output:  tensor([[[ 0.0153,  0.0209,  0.1430,  ..., -0.2716,  0.0399, -0.1267],
         [ 0.0425,  0.0909, -0.1948,  ..., -0.4955, -0.2217,  0.0124],
         [-0.2442,  0.1656,  0.0529,  ..., -0.0154, -0.1566,  0.0212],
         ...,
         [-0.0696,  0.0962,  0.0306,  ..., -0.2020, -0.1142,  0.0178],
         [-0.1064, -0.0368, -0.0710,  ..., -0.4550, -0.3579, -0.2082],
         [-0.0990, -0.0325,  0.0804,  ..., -0.7170, -0.4100, -0.2311]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0135],
         [ 0.0862],
         [-0.0245],
         [ 0.0088],
         [-0.0240],
         [ 0.0143],
         [ 0.0778],
         [ 0.0799],
         [ 0.0158],
         [ 0.0009]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3452], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a7471f655429979e2882955
para_indexes:  tensor([ 19,  93, 213, 313, 425, 480, 506, 549, 624, 711], device='cuda:0')
sp_para_output:  tensor([[[ 0.2206,  0.0517,  0.2061,  ..., -0.5428, -0.3511,  0.2427],
         [ 0.0121,  0.0599, -0.0140,  ..., -0.7034, -0.2944,  0.1278],
         [-0.0178, -0.0181,  0.1813,  ..., -0.6606, -0.2476,  0.1690],
         ...,
         [-0.1619, -0.1024,  0.0129,  ..., -0.5623,  0.5633,  0.3472],
         [ 0.0038, -0.1249,  0.1015,  ..., -0.7279,  0.0564,  0.0314],
         [-0.1137, -0.0652, -0.0423,  ..., -0.1900, -0.1860,  0.4241]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0123],
         [ 0.1069],
         [ 0.1023],
         [-0.0142],
         [ 0.0079],
         [ 0.0489],
         [ 0.0568],
         [ 0.0262],
         [ 0.0539],
         [ 0.0598]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3179], device='cuda:0', dtype=torch.float16), 'start_log

evidence_candidates: {0: tensor([0], device='cuda:0'), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0845, 0.0602], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1404,  0.4326, -0.0034,  0.0990,  0.0630,  0.2566,  0.0362,  0.1447],
       device='cuda:0')
para_sents_offset: [0, 3, 8, 9, 12, 14, 20, 22, 24]
evidence_candidates: {0: tensor([0], device='cuda:0'), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64)}
par

s_to_p_map:  [0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7, 7, 8, 8, 9, 9]
para_sent_logits: tensor([0.0303], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0303], device='cuda:0')
para_sents_offset: [0, 1]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0002, -0.0054], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0303, -0.0051], device='cuda:0')
para_sents_offset: [0, 1, 3]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0737,  0.0237, -0.0007,  0.0078], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0303, -0.0051,  0.1045], device='cuda:0')
para_sents_offset: [0, 1, 3, 7]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64)}


sp_para_output:  tensor([[[ 1.1186e-01, -1.8183e-01,  7.1530e-02,  ..., -4.3837e-01,
          -1.5187e-01, -1.6303e-02],
         [-2.4291e-01, -9.0968e-02, -2.7895e-02,  ..., -4.8233e-01,
          -5.9568e-02, -2.0514e-01],
         [-4.1463e-01, -9.4313e-02,  2.1724e-03,  ..., -5.8808e-01,
           2.2939e-02,  1.4593e-01],
         ...,
         [-2.3937e-01, -6.3751e-02,  1.0975e-01,  ..., -5.3081e-01,
           2.0067e-01, -5.7724e-02],
         [-5.7771e-02, -1.2019e-01, -5.5298e-02,  ..., -3.6413e-01,
           1.6321e-04,  1.1335e-01],
         [ 1.0362e-01, -1.0084e-01,  6.2851e-02,  ..., -7.7906e-01,
           1.4873e-01, -1.3035e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[-0.0098],
         [ 0.0249],
         [ 0.0258],
         [-0.0141],
         [ 0.0215],
         [ 0.0384],
         [ 0.0531],
         [ 0.0626],
         [ 0.0266],
         [ 0.0103]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor(

validation_step
qid:  5ade949b5542997c77adee63
para_indexes:  tensor([ 22,  61, 107, 137, 179, 219, 264, 318, 356, 456], device='cuda:0')
sp_para_output:  tensor([[[ 6.5592e-02, -1.2404e-01,  7.3506e-03,  ..., -1.2831e-01,
          -2.6207e-01, -2.1872e-01],
         [-1.7142e-01, -3.4678e-04,  2.7658e-03,  ..., -6.9592e-02,
          -5.3092e-02,  1.3993e-02],
         [-6.2466e-02, -4.3824e-02, -1.0520e-01,  ..., -1.9956e-01,
          -6.6256e-02,  1.4531e-01],
         ...,
         [-2.8205e-01,  5.3137e-02,  7.5431e-02,  ..., -4.0168e-01,
           6.0108e-02,  3.3849e-02],
         [-2.6858e-01, -2.2150e-02,  1.6206e-01,  ..., -4.7584e-01,
           1.2626e-01, -2.2718e-01],
         [-1.5681e-01, -4.4805e-02,  2.7744e-02,  ..., -1.9538e-01,
          -1.6105e-02,  1.2520e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[0.0685],
         [0.1082],
         [0.0982],
         [0.0530],
         [0.0576],
         [0.0793],
         [0.0640],
         [0.0870],
         [0

sp_para_output:  tensor([[[-0.1480,  0.4852, -0.0366,  ..., -0.2822,  0.1094,  0.0928],
         [-0.0194,  0.2141, -0.0217,  ..., -0.5915, -0.0729,  0.0288],
         [-0.2489,  0.3020,  0.0209,  ..., -0.3760,  0.2339, -0.1181],
         ...,
         [-0.1013,  0.0374,  0.0591,  ..., -0.2219, -0.0015, -0.1430],
         [ 0.0289, -0.0584,  0.0192,  ..., -0.4852, -0.1630,  0.0874],
         [-0.1874,  0.0995, -0.1188,  ..., -0.5391,  0.0186, -0.0163]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0099],
         [ 0.0350],
         [ 0.0043],
         [ 0.0199],
         [ 0.0013],
         [ 0.0054],
         [-0.0135],
         [ 0.0580],
         [ 0.0461],
         [ 0.0416]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3184], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3184], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

para_sent_logits: tensor([0.0751, 0.0768, 0.0049], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1000,  0.2065,  0.0166,  0.0928, -0.0328,  0.0766,  0.0158, -0.0974,
         0.1568], device='cuda:0')
para_sents_offset: [0, 3, 7, 8, 12, 15, 20, 22, 25, 28]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([1], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0726,  0.1469], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1000,  0.2065,  0.0166,  0.0928, -0.0328,  0.0766,  0.0158, -0.0974,
         0.1568,  0.0743], device='cuda:0')
para_sents_offset: [0, 3, 

sp_para_output:  tensor([[[ 0.1505,  0.1673,  0.1129,  ..., -0.6751, -0.1278,  0.2433],
         [ 0.0806, -0.0073,  0.0221,  ..., -0.2086, -0.1458, -0.3115],
         [-0.0101,  0.2044,  0.0247,  ..., -0.4245, -0.4354, -0.0254],
         ...,
         [-0.0484,  0.3234,  0.2704,  ..., -0.5244, -0.1860,  0.0809],
         [ 0.1477,  0.2708, -0.0268,  ..., -0.9747, -0.0223,  0.1873],
         [-0.0457,  0.1965, -0.0611,  ..., -0.5439, -0.2535, -0.0340]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0931],
         [ 0.0325],
         [ 0.0031],
         [-0.0061],
         [ 0.0198],
         [-0.0006],
         [ 0.0275],
         [ 0.1047],
         [ 0.0252],
         [ 0.0187]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2500], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2500], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 1

para_indexes:  tensor([ 27,  50, 101, 145, 175, 199, 313, 362, 397, 425], device='cuda:0')
sp_para_output:  tensor([[[-0.3265, -0.0651,  0.0715,  ...,  0.2656, -0.1291,  0.0191],
         [-0.3466,  0.0019, -0.1389,  ..., -0.1322, -0.0233,  0.4895],
         [-0.4673, -0.0414,  0.0278,  ...,  0.0053, -0.0374,  0.1710],
         ...,
         [-0.3394, -0.0025,  0.0712,  ...,  0.4216,  0.0268, -0.0517],
         [-0.3708,  0.0010,  0.0446,  ...,  0.5171, -0.0901,  0.0802],
         [-0.2548,  0.1913,  0.1035,  ...,  0.1926, -0.2112,  0.2740]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0786],
         [ 0.0632],
         [ 0.1080],
         [-0.0029],
         [ 0.0156],
         [ 0.0459],
         [-0.0447],
         [-0.0162],
         [-0.0099],
         [ 0.0385]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3533], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_s

sp_para_output:  tensor([[[-0.1557, -0.2779,  0.0817,  ..., -0.2115, -0.1542,  0.1796],
         [-0.3116, -0.1954,  0.1328,  ..., -0.3100,  0.1873,  0.2831],
         [-0.4930,  0.0347,  0.2445,  ..., -0.2669, -0.0121,  0.0656],
         [-0.1391,  0.0812,  0.0486,  ..., -0.6144,  0.1999,  0.3197]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0672],
         [0.0782],
         [0.0765],
         [0.0330]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3442], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3442], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1, 2, 2, 3, 3]
para_sent_logits: tensor([0.0386, 0.0690], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1076], device='cuda:0')
para_sents_offset: [0, 2]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0

sp_para_output:  tensor([[[ 2.0617e-01, -8.7434e-02,  3.2890e-01,  ..., -3.2528e-02,
           4.9586e-02,  1.3009e-01],
         [ 4.9808e-02, -7.6197e-02,  2.1740e-01,  ..., -2.8948e-01,
          -1.9579e-01, -1.0769e-01],
         [-1.3290e-01, -2.6449e-02, -3.8799e-03,  ..., -5.1988e-01,
           5.1076e-03, -1.2302e-01],
         ...,
         [ 2.0341e-01, -2.0220e-01,  1.3306e-01,  ..., -2.3308e-01,
           4.0509e-02, -2.2439e-01],
         [-1.3769e-01, -4.6833e-04,  1.5551e-01,  ...,  5.4803e-02,
          -2.6013e-01, -2.9077e-01],
         [-7.0412e-02,  1.6672e-02,  2.1379e-01,  ..., -2.2050e-01,
          -1.6146e-01, -6.1390e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0602],
         [-0.0228],
         [-0.0309],
         [-0.0112],
         [-0.0402],
         [ 0.0452],
         [ 0.0150],
         [ 0.0209],
         [-0.0423],
         [-0.0802]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor(

sp_para_output:  tensor([[[ 0.0765, -0.1178,  0.2161,  ..., -0.1639,  0.1255,  0.0508],
         [-0.4001, -0.2791,  0.0751,  ...,  0.3512, -0.0470,  0.4348],
         [-0.4305, -0.1205,  0.0752,  ...,  0.2623,  0.2059,  0.0803],
         ...,
         [-0.3024, -0.0601,  0.0887,  ..., -0.6161,  0.0468,  0.2798],
         [-0.1206, -0.0729, -0.0359,  ..., -0.0477,  0.1201, -0.0770],
         [-0.0334, -0.0941,  0.2632,  ..., -0.4808,  0.2991, -0.0663]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0005],
         [ 0.0711],
         [ 0.0746],
         [ 0.0455],
         [ 0.0315],
         [ 0.0618],
         [-0.0278],
         [-0.0313]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2996], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2996], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 6, 6, 6, 7]
para_

         [ 0.0248]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3269], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3269], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9]
para_sent_logits: tensor([-0.0252,  0.0363], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0111], device='cuda:0')
para_sents_offset: [0, 2]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0187, 0.0148], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0111, 0.0335], device='cuda:0')
para_sents_offset: [0, 2, 4]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.1033, 0.0784, 0.1031], dev

validation_step
qid:  5a901d6c5542995651fb50bd
para_indexes:  tensor([14], device='cuda:0')
sp_para_output:  tensor([[[ 5.7035e-04,  2.5774e-01,  8.4207e-02, -8.7653e-03,  1.3338e+00,
           4.5762e-01, -1.6997e-02, -1.3469e-01,  4.6000e-01, -4.3256e-01,
           3.2366e-01,  3.8662e-01, -2.7092e-01, -1.5192e-01,  2.3604e-03,
           6.9642e-02,  2.6813e-01,  5.5878e-01, -3.9927e-01,  7.8443e-01,
          -7.0233e-02, -2.7916e-01,  4.0855e-01,  1.1290e-01,  5.4582e-01,
           4.4181e-02, -3.9397e-02,  6.0793e-02,  2.1891e-01, -6.7233e-01,
          -4.7639e-01,  3.1324e-01,  3.2258e-01, -3.2791e-01, -1.3926e-01,
           1.7500e-01, -2.3279e-01,  8.0358e-02, -5.1539e-01, -6.9041e-02,
           3.3757e-01,  2.8337e-01,  3.5600e-01, -5.6166e-03,  6.1796e-01,
           3.7017e-01,  3.3740e-01,  1.2666e-01,  1.9412e-01,  4.0231e-01,
           3.2552e-02, -1.9489e-01,  6.7823e-02, -2.3692e-01, -2.4789e-01,
           3.7694e-02,  3.0045e-01,  1.4381e-02, -3.4065e-01, -1.8

validation_step
qid:  5ae16ea85542990adbacf790
para_indexes:  tensor([ 20, 126, 215, 319, 503, 557, 667, 751, 827, 919], device='cuda:0')
sp_para_output:  tensor([[[ 0.3274, -0.3398,  0.2825,  ..., -0.2291, -0.0046, -0.0283],
         [ 0.1481, -0.5250,  0.0643,  ..., -0.2296, -0.0377,  0.1575],
         [-0.0915, -0.3683,  0.1150,  ..., -0.4255, -0.0834, -0.0208],
         ...,
         [-0.0685, -0.4233,  0.3027,  ..., -0.2044, -0.0691, -0.0385],
         [-0.0912,  0.0691,  0.3804,  ..., -0.5918, -0.1934, -0.0732],
         [-0.0825, -0.4404,  0.0848,  ..., -0.6233, -0.1768,  0.0660]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0107],
         [-0.0125],
         [-0.0112],
         [-0.0869],
         [ 0.0265],
         [ 0.0197],
         [-0.0673],
         [-0.0667],
         [-0.0828],
         [-0.0105]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3752], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[-0.3024,  0.1313, -0.0958,  ..., -0.3302, -0.1082, -0.0047],
         [ 0.0826, -0.2986, -0.0033,  ..., -0.1302, -0.3962,  0.1153],
         [ 0.0192,  0.1415, -0.0578,  ...,  0.0110, -0.3287, -0.0820],
         ...,
         [-0.1422, -0.0214, -0.0328,  ..., -0.0010, -0.4239,  0.4663],
         [ 0.0215,  0.3470, -0.0897,  ..., -0.3296, -0.4273, -0.0174],
         [-0.1663,  0.4794, -0.1984,  ..., -0.2323, -0.3329, -0.2180]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0636],
         [ 0.0696],
         [ 0.0027],
         [-0.0182],
         [ 0.0015],
         [ 0.0611],
         [ 0.0460],
         [ 0.1281],
         [-0.0071],
         [ 0.0305]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2690], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2690], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 2, 3

validation_step
qid:  5abbfbcf55429965836003cf
para_indexes:  tensor([  15,  164,  272,  338,  458,  521,  630,  793,  938, 1000],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.2173, -0.0173,  0.1581,  ..., -0.4911, -0.0926, -0.0939],
         [-0.0038, -0.0840,  0.0791,  ..., -0.6618, -0.0118,  0.1419],
         [ 0.0327, -0.0959,  0.0607,  ..., -0.6473,  0.1246,  0.0914],
         ...,
         [ 0.0677,  0.1656, -0.0575,  ..., -0.0848,  0.4224, -0.1145],
         [ 0.1688, -0.0065,  0.1136,  ..., -0.5337,  0.0181, -0.0554],
         [ 0.2146,  0.0214,  0.0923,  ..., -0.3959, -0.1481, -0.0829]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0447],
         [-0.0228],
         [-0.0759],
         [-0.0029],
         [ 0.0413],
         [-0.0041],
         [ 0.0453],
         [-0.0688],
         [-0.0450],
         [ 0.0252]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3499], device='cuda:0', dtype=torch.flo

sp_para_output:  tensor([[[ 0.1242, -0.2239,  0.2427,  ..., -0.0963, -0.1892,  0.0706],
         [-0.2987, -0.2495,  0.0489,  ...,  0.0871, -0.2510,  0.2028],
         [-0.1970, -0.1754, -0.0412,  ..., -0.3104, -0.3049,  0.1193],
         [-0.2698, -0.3197,  0.0473,  ..., -0.1061, -0.4294, -0.0062],
         [-0.3668, -0.2016, -0.0751,  ...,  0.1834, -0.1565,  0.0412],
         [-0.5138, -0.4070, -0.1293,  ...,  0.4436, -0.3080,  0.0138]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0407],
         [ 0.0654],
         [ 0.1085],
         [ 0.0502],
         [ 0.0169],
         [-0.0182]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3276], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3276], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 3, 4, 5]
para_sent_logits: tensor([-0.0015,  0.0864], device='cuda:0', dtype=torch.float16)
par

validation_step
qid:  5a86e6f65542994775f6077c
para_indexes:  tensor([ 24,  61, 133, 202, 264, 400, 489, 532, 586, 668], device='cuda:0')
sp_para_output:  tensor([[[-0.0107, -0.2474,  0.1770,  ..., -0.4220, -0.1936, -0.0884],
         [-0.0250, -0.1080, -0.2257,  ...,  0.3748,  0.0795, -0.0149],
         [-0.0461, -0.1723,  0.0071,  ..., -0.7055, -0.3447, -0.0397],
         ...,
         [-0.0357, -0.1738,  0.0319,  ..., -0.3641, -0.0802, -0.0397],
         [ 0.1849,  0.1583,  0.1281,  ..., -0.6225, -0.2066,  0.1268],
         [ 0.0493, -0.0573,  0.0735,  ..., -0.7097, -0.2723,  0.0981]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0314],
         [ 0.0253],
         [-0.0228],
         [ 0.0875],
         [-0.0476],
         [-0.0857],
         [ 0.0251],
         [-0.0203],
         [-0.0078],
         [-0.0473]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2261], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a7d1d825542995ed0d165f5
para_indexes:  tensor([ 15,  72, 141, 233, 338, 439, 490, 568, 642, 699], device='cuda:0')
sp_para_output:  tensor([[[ 0.2250, -0.1380,  0.0929,  ..., -0.5386, -0.2154,  0.0549],
         [ 0.0581, -0.2086,  0.0708,  ..., -0.1751, -0.2124,  0.4240],
         [ 0.0798, -0.1250, -0.0768,  ..., -0.8102, -0.3605,  0.2005],
         ...,
         [ 0.0768, -0.3103,  0.0471,  ..., -0.5486, -0.0892,  0.1273],
         [ 0.2561, -0.2411, -0.0560,  ..., -0.5271, -0.2102,  0.0922],
         [-0.0182, -0.0665, -0.1811,  ..., -0.7841, -0.2581, -0.2288]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0578],
         [ 0.0577],
         [-0.0745],
         [ 0.0135],
         [ 0.0173],
         [ 0.0073],
         [ 0.0629],
         [ 0.0364],
         [ 0.0143],
         [ 0.0170]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3660], device='cuda:0', dtype=torch.float16), 'start_log

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0613], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0523, -0.0775, -0.0806, -0.0314, -0.0374, -0.1564, -0.0613],
       device='cuda:0')
para_sents_offset: [0, 4, 5, 6, 9, 10, 13, 14]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0698], device='cuda:0', d

sp_para_output:  tensor([[[-0.0332,  0.4435, -0.2093,  ..., -0.5163,  0.3064,  0.1066],
         [ 0.2360, -0.0278, -0.1495,  ..., -0.5807, -0.0581,  0.1009],
         [-0.0236, -0.0690,  0.1700,  ..., -0.5344,  0.1105, -0.0602],
         ...,
         [-0.0480, -0.1876,  0.0099,  ..., -0.3918,  0.2473,  0.1494],
         [ 0.0847,  0.0021,  0.0126,  ..., -0.3186,  0.2074,  0.2006],
         [-0.0303,  0.1722,  0.1003,  ..., -0.1681, -0.0337, -0.0382]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0333],
         [ 0.0859],
         [-0.0239],
         [ 0.0443],
         [-0.1266],
         [-0.0195],
         [ 0.0406],
         [ 0.0811],
         [ 0.0210],
         [-0.0242]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2600], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2600], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 1

validation_step
qid:  5a8b7d7d5542997f31a41d4b
para_indexes:  tensor([ 19,  46,  71, 197, 301, 326, 419, 484, 530, 630], device='cuda:0')
sp_para_output:  tensor([[[ 0.1275,  0.2569,  0.2177,  ..., -0.5090, -0.2464, -0.1655],
         [-0.0174,  0.2859, -0.0746,  ..., -0.0609, -0.3169,  0.0403],
         [ 0.1882, -0.2081,  0.3201,  ..., -0.6781, -0.0598,  0.2994],
         ...,
         [-0.1066, -0.0191, -0.0594,  ..., -0.4963, -0.0664,  0.0297],
         [-0.0009, -0.0109,  0.2168,  ..., -0.3435, -0.1681, -0.1812],
         [ 0.2788,  0.2677, -0.0759,  ..., -0.3015,  0.0499, -0.3011]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0462],
         [ 0.0308],
         [ 0.0626],
         [ 0.0402],
         [-0.0269],
         [ 0.0194],
         [ 0.0485],
         [ 0.0866],
         [ 0.0822],
         [ 0.1121]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3064], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5ab4071955429976abd1bd38
para_indexes:  tensor([ 12, 114, 298, 375, 445, 524, 603, 688, 781, 846], device='cuda:0')
sp_para_output:  tensor([[[ 0.2670,  0.0634,  0.1058,  ..., -0.1962, -0.3400,  0.2125],
         [ 0.3216,  0.0359, -0.1075,  ..., -0.1493, -0.2149,  0.0357],
         [ 0.0908, -0.0011, -0.3769,  ..., -0.7588, -0.1108,  0.0624],
         ...,
         [ 0.2317,  0.2819,  0.0479,  ..., -0.5286, -0.4619, -0.1642],
         [ 0.0937, -0.0301,  0.0981,  ..., -0.2249, -0.2053,  0.0377],
         [-0.2642,  0.4661, -0.1729,  ..., -0.6250, -0.1727, -0.0634]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0099],
         [-0.0061],
         [-0.0486],
         [-0.0539],
         [ 0.0605],
         [ 0.0226],
         [ 0.0597],
         [-0.0336],
         [-0.0144],
         [-0.0024]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.5552], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits: tensor([ 0.0272, -0.0164], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0298, -0.1049,  0.0153,  0.0782,  0.0804, -0.0606,  0.0879,  0.0432,
         0.0108], device='cuda:0')
para_sents_offset: [0, 1, 3, 4, 6, 8, 10, 12, 16, 18]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0097], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0298, -0.1049,  0.0153,  0.0782,  0.0804, -0.0606,  0.0879,  0.0432,
         0.0108,  0.0097], device='cuda:0')
para_sents_offset: [0, 1, 

validation_step
qid:  5ab7af9d5542993667793fe9
para_indexes:  tensor([ 15,  55, 125, 167, 209, 469, 519, 576, 635], device='cuda:0')
sp_para_output:  tensor([[[ 0.0489, -0.0570, -0.0233,  ...,  0.0537, -0.2071,  0.0547],
         [-0.0864, -0.0536, -0.0205,  ..., -0.2049, -0.1523, -0.0737],
         [-0.0715,  0.0921, -0.1347,  ..., -0.1338, -0.3212, -0.2551],
         ...,
         [ 0.1645,  0.1285,  0.0572,  ..., -0.1219, -0.3929, -0.1056],
         [-0.1182,  0.0605,  0.0633,  ..., -0.3685, -0.3287,  0.0034],
         [-0.0756,  0.3514,  0.0922,  ..., -0.2099, -0.3666, -0.3774]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.1479],
         [0.1009],
         [0.1614],
         [0.0301],
         [0.1119],
         [0.0925],
         [0.1071],
         [0.1340],
         [0.1409]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3066], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -10000

para_sent_logits: tensor([0.0368, 0.1405, 0.0879], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1848, 0.0466, 0.3086, 0.2652], device='cuda:0')
para_sents_offset: [0, 2, 3, 7, 10]
evidence_candidates: {0: tensor([1], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0, 2], device='cuda:0'), 3: tensor([1], device='cuda:0')}
para_sent_logits: tensor([ 0.0647,  0.0279,  0.0837,  0.1472,  0.0194,  0.0648,  0.0082, -0.0434,
         0.0602], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1848, 0.0466, 0.3086, 0.2652, 0.4327], device='cuda:0')
para_sents_offset: [0, 2, 3, 7, 10, 19]
evidence_candidates: {0: tensor([1], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0, 2], device='cuda:0'), 3: tensor([1], device='cuda:0'), 4: tensor([3], device='cuda:0')}
para_sent_logits: tensor([0.0803, 0.0457], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1848, 0.0466, 0.3

sp_para_output:  tensor([[[ 0.2467, -0.0582, -0.1294,  ..., -0.7292, -0.0114, -0.0652],
         [ 0.0741,  0.0815, -0.3165,  ..., -0.6497, -0.1960, -0.3225],
         [ 0.1333,  0.0790, -0.1130,  ..., -0.7219, -0.0860, -0.0868],
         ...,
         [-0.3238, -0.2338, -0.2751,  ..., -0.1366,  0.1907,  0.1290],
         [-0.0203, -0.1611,  0.1848,  ..., -0.4505, -0.0583,  0.0290],
         [-0.0210,  0.2804,  0.0199,  ..., -0.3795,  0.4462,  0.2263]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0347],
         [ 0.1309],
         [ 0.0515],
         [ 0.0695],
         [ 0.0719],
         [ 0.1514],
         [ 0.0956],
         [ 0.0813],
         [-0.0089],
         [ 0.0024]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2988], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2988], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0084,  0.0179, -0.1028], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0438,  0.0298,  0.0808, -0.0933], device='cuda:0')
para_sents_offset: [0, 3, 5, 8, 11]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0992], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0438,  0.0298,  0.0808, -0.0933, -0.0992], device='cuda:0')
para_sents_offset: [0, 3, 5, 8, 11, 12]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: te

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([2], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([1], device='cuda:0')}
para_sent_logits: tensor([0.0368, 0.0521], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0571,  0.2283,  0.0954,  0.1843,  0.0250, -0.1263,  0.1165,  0.1592,
         0.2134,  0.0890], device='cuda:0')
para_sents_offset: [0, 2, 5, 8, 11, 13, 17, 19, 23, 26, 28]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([2], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor

sp_para_output:  tensor([[[ 0.2609, -0.0159,  0.1843,  ..., -0.4694,  0.0392,  0.2086],
         [ 0.1658, -0.1912,  0.1875,  ..., -0.1799, -0.2543,  0.1724],
         [ 0.1463, -0.2722, -0.0656,  ..., -0.5874, -0.2095,  0.0315],
         ...,
         [-0.0166,  0.2818,  0.0069,  ..., -0.5283, -0.3151,  0.0444],
         [ 0.1247,  0.0628,  0.1504,  ..., -0.5217, -0.3115, -0.1289],
         [ 0.2270, -0.0989, -0.0504,  ..., -0.4730, -0.1605, -0.0889]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0285],
         [ 0.0030],
         [-0.0728],
         [-0.0561],
         [-0.0173],
         [ 0.0524],
         [-0.0307],
         [-0.0622],
         [-0.0369],
         [-0.0235]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3372], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3372], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 1

validation_step
qid:  5a865e8a55429960ec39b67a
para_indexes:  tensor([ 18,  86, 128, 201, 221, 268, 315, 340, 443], device='cuda:0')
sp_para_output:  tensor([[[ 0.2332, -0.1549,  0.0075,  ..., -0.1779,  0.0290,  0.0325],
         [ 0.0517, -0.1263, -0.0194,  ..., -0.2894, -0.1255,  0.0697],
         [ 0.2278, -0.2956, -0.0626,  ..., -0.3044, -0.0389,  0.2240],
         ...,
         [-0.4632,  0.0681, -0.0895,  ..., -0.1075, -0.0320,  0.0192],
         [ 0.0852, -0.1317, -0.0175,  ..., -0.3901,  0.1202,  0.1681],
         [ 0.0367, -0.1518, -0.0408,  ..., -0.2118,  0.0956, -0.1501]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0991],
         [0.1573],
         [0.0496],
         [0.1627],
         [0.1315],
         [0.0919],
         [0.1216],
         [0.1092],
         [0.0787]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3252], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -10000

       device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 7.8430e-02, -9.5299e-02, -1.0197e-01, -2.0151e-02, -1.3089e-04,
         3.2326e-02,  1.4219e-01], device='cuda:0')
para_sents_offset: [0, 4, 8, 11, 14, 15, 17, 24]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([6], device='cuda:0')}
para_sent_logits: tensor([-0.0856, -0.0511,  0.0316, -0.0685, -0.0300], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([ 7.8430e-02, -9.5299e-02, -1.0197e-01, -2.0151e-02, -1.3089e-04,
         3.2326e-02,  1.4219e-01, -2.0351e-01], device='cuda:0')
para_sents_offset: [0, 4, 8, 11, 14, 15, 17, 24, 29]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int

sp_para_output:  tensor([[[-0.0396,  0.1256, -0.1328,  ..., -0.4364,  0.1369,  0.1922],
         [ 0.1050,  0.0984, -0.0202,  ..., -0.6568, -0.4101,  0.3566],
         [ 0.0931, -0.1384, -0.1604,  ...,  0.2154, -0.1748,  0.0246],
         ...,
         [ 0.0823,  0.0800, -0.2031,  ..., -0.2996, -0.5190, -0.1615],
         [-0.1574,  0.4531, -0.1042,  ..., -0.7943, -0.2206,  0.0848],
         [-0.0130,  0.3127, -0.2367,  ..., -0.5852,  0.1200,  0.0490]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0350],
         [ 0.0141],
         [ 0.0344],
         [-0.0230],
         [ 0.0384],
         [ 0.0511],
         [ 0.0669],
         [ 0.0755],
         [ 0.0023],
         [-0.0104]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4165], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4165], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 2

validation_step
qid:  5ac3a47f554299391541386d
para_indexes:  tensor([ 28,  92, 128, 209, 240, 273, 369, 447, 504, 598], device='cuda:0')
sp_para_output:  tensor([[[-0.0301, -0.1572,  0.0747,  ..., -0.7004,  0.0526,  0.2431],
         [-0.2242, -0.1552,  0.0386,  ..., -0.3811,  0.0508,  0.0045],
         [-0.1394, -0.4140, -0.0080,  ..., -0.3960, -0.0119,  0.0251],
         ...,
         [-0.3036, -0.3892,  0.0759,  ..., -0.4981,  0.1004,  0.1605],
         [-0.0393, -0.2332, -0.0197,  ..., -0.5458,  0.0347, -0.0213],
         [-0.1133, -0.1600, -0.0234,  ..., -0.4690, -0.1397, -0.0805]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0933],
         [0.1443],
         [0.0963],
         [0.1256],
         [0.0635],
         [0.0378],
         [0.0959],
         [0.1354],
         [0.1035],
         [0.1375]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3586], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

para_sent_logits: tensor([0.0668, 0.0049, 0.0158], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0964, 0.1882, 0.1655, 0.0875], device='cuda:0')
para_sents_offset: [0, 2, 5, 7, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0380, -0.0051, -0.0535], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0964,  0.1882,  0.1655,  0.0875, -0.0207], device='cuda:0')
para_sents_offset: [0, 2, 5, 7, 10, 13]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0814], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([

sp_para_output:  tensor([[[ 0.0558, -0.2122,  0.1976,  ..., -0.3441, -0.0646, -0.0827],
         [ 0.0947, -0.2171, -0.0269,  ..., -0.4616,  0.0481,  0.1067],
         [-0.0670,  0.2173,  0.0528,  ..., -0.8733,  0.1214, -0.1631],
         ...,
         [-0.2259, -0.0215,  0.0356,  ..., -0.2588, -0.0028, -0.0740],
         [-0.0725,  0.0516, -0.1685,  ..., -0.4943, -0.3196, -0.2743],
         [-0.3139,  0.2679,  0.0301,  ..., -0.6891, -0.0028, -0.0390]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0594],
         [0.0721],
         [0.0689],
         [0.0443],
         [0.0942],
         [0.0759],
         [0.0588],
         [0.0606],
         [0.0667],
         [0.0085]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2688], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2688], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1, 1, 1, 2,

validation_step
qid:  5ae16d9655429920d52343ce
para_indexes:  tensor([ 25,  98, 127, 193, 248, 276, 386, 467, 567, 658], device='cuda:0')
sp_para_output:  tensor([[[-0.0584, -0.1226, -0.0435,  ..., -0.4615,  0.0924,  0.1237],
         [ 0.2025,  0.0053,  0.0055,  ..., -0.7909,  0.0735,  0.2851],
         [-0.0059, -0.1673, -0.0137,  ..., -0.6574, -0.0669,  0.0458],
         ...,
         [-0.0120,  0.1671,  0.0735,  ..., -0.3288, -0.1104, -0.0710],
         [ 0.1637, -0.0466,  0.0470,  ..., -0.7366,  0.0008,  0.2017],
         [ 0.3331,  0.1742, -0.0401,  ..., -0.6306, -0.1290,  0.2901]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0430],
         [ 0.0439],
         [-0.0090],
         [ 0.0260],
         [ 0.0106],
         [-0.0109],
         [-0.0561],
         [ 0.0990],
         [ 0.0558],
         [-0.0132]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3630], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.4293, -0.1495,  0.1869,  ..., -0.1958, -0.4430,  0.0310],
         [-0.0416, -0.1467, -0.0555,  ..., -0.7434, -0.4086,  0.0984],
         [ 0.2017,  0.0368, -0.1346,  ..., -0.5712, -0.2973,  0.0190],
         ...,
         [ 0.1948,  0.0933,  0.1482,  ..., -0.3592, -0.5569,  0.0775],
         [ 0.1815, -0.1864,  0.0762,  ..., -0.5562, -0.1197,  0.0342],
         [ 0.1089,  0.0128, -0.0674,  ..., -0.8169, -0.4754,  0.0807]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0228],
         [ 0.0672],
         [ 0.0396],
         [-0.0455],
         [ 0.0135],
         [ 0.0066],
         [-0.0527],
         [ 0.0467],
         [ 0.0093],
         [-0.0975]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3281], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3281], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

validation_step
qid:  5a89df265542992e4fca840d
para_indexes:  tensor([ 18,  59,  93, 144, 209, 291, 418, 454, 481, 586], device='cuda:0')
sp_para_output:  tensor([[[ 0.0365, -0.2328, -0.0057,  ..., -0.3599, -0.2218,  0.0213],
         [ 0.0869,  0.0306,  0.0817,  ..., -0.4942, -0.0561, -0.0872],
         [-0.2049,  0.0687, -0.1663,  ..., -0.4946,  0.0600,  0.0208],
         ...,
         [-0.3090, -0.0049,  0.0081,  ..., -0.0055, -0.1406,  0.1804],
         [-0.2103, -0.1628, -0.0668,  ...,  0.1523, -0.1803,  0.0125],
         [ 0.0528, -0.1260,  0.1560,  ..., -0.2033, -0.0970,  0.2115]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0817],
         [ 0.0380],
         [ 0.0427],
         [-0.0117],
         [ 0.0705],
         [-0.0017],
         [ 0.0976],
         [ 0.0888],
         [ 0.1060],
         [ 0.0889]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4163], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.0971, -0.0481,  0.1469,  ...,  0.2688, -0.2065,  0.0978],
         [ 0.0277,  0.1880,  0.0685,  ..., -0.2360, -0.4758,  0.1152],
         [-0.0990, -0.0058, -0.0033,  ..., -0.5167, -0.0651,  0.3289],
         ...,
         [-0.2475,  0.5302,  0.0028,  ...,  0.0682, -0.1198,  0.0374],
         [-0.0541,  0.1805,  0.1448,  ..., -0.0059, -0.3830,  0.0118],
         [-0.1031,  0.3063,  0.0907,  ...,  0.1275, -0.0776,  0.1622]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0050],
         [ 0.0019],
         [-0.0186],
         [ 0.0197],
         [ 0.0720],
         [ 0.0766],
         [-0.0098],
         [ 0.0573],
         [-0.0420]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2764], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2764], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 3, 3, 4, 5, 5, 6, 6, 

sp_para_output:  tensor([[[ 0.2073,  0.0452, -0.0097,  ..., -0.6805, -0.2374,  0.0670],
         [ 0.0094, -0.1413,  0.0158,  ..., -0.1198,  0.1531,  0.0959],
         [ 0.2341, -0.0463, -0.1644,  ..., -0.7187, -0.2929,  0.1153],
         ...,
         [ 0.1723, -0.1511, -0.1790,  ..., -0.3030, -0.2772, -0.2782],
         [ 0.0943, -0.0400, -0.2678,  ..., -0.3259, -0.0474, -0.1894],
         [-0.0583, -0.0753,  0.1038,  ..., -0.7823, -0.2513, -0.2251]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0307],
         [ 0.0808],
         [ 0.0254],
         [ 0.0889],
         [-0.0069],
         [ 0.0783],
         [ 0.0671],
         [ 0.0316],
         [ 0.0412],
         [ 0.1176]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3271], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3271], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 2

       dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0334, -0.0087,  0.0038,  0.0921, -0.1374, -0.0565,  0.2432,  0.1221,
        -0.0334], device='cuda:0')
para_sents_offset: [0, 7, 14, 19, 25, 35, 40, 46, 53, 58]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0696, -0.0706,  0.0246,  0.1401, -0.0417,  0.0399, -0.0450],
       device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0334, -0.0087,  0.0038,  0.0921, -0.1374, -0.0565,  0.2432,  0.1221,
        -0.0334,  0.1169], device='cuda:0')
para_sents_offs

validation_step
qid:  5adf0e7c5542992d7e9f9297
para_indexes:  tensor([  29,  140,  217,  334,  526,  697,  930, 1067, 1143, 1354],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0217, -0.0753,  0.2448,  ..., -0.4504, -0.1992, -0.0242],
         [-0.1889, -0.1992,  0.1626,  ..., -0.3086, -0.2946, -0.2599],
         [-0.0753, -0.1727, -0.0122,  ..., -0.5606, -0.1707, -0.0011],
         ...,
         [ 0.1246,  0.0671,  0.0739,  ..., -0.6974, -0.0584,  0.1251],
         [ 0.1079, -0.2374, -0.0241,  ..., -0.5229, -0.4045, -0.2417],
         [-0.0356, -0.3553,  0.1622,  ..., -0.6984, -0.3676, -0.0254]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0060],
         [ 0.0256],
         [ 0.0507],
         [ 0.0966],
         [ 0.0169],
         [ 0.0211],
         [ 0.0095],
         [ 0.1055],
         [ 0.0406],
         [ 0.0255]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2788], device='cuda:0', dtype=torch.flo

       dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0344, -0.0937, -0.0701, -0.0138, -0.0444,  0.0983,  0.0199, -0.0517],
       device='cuda:0')
para_sents_offset: [0, 2, 4, 5, 6, 8, 10, 11, 15]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0275], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0344, -0.0937, -0.0701, -0.0138, -0.0444,  0.0983,  0.0199, -0.0517,
         0.0275], device='cuda:0')
para_sents_offset: [0, 2, 4, 5, 6, 8, 10, 11, 15, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0

sp_para_output:  tensor([[[ 0.2229,  0.2925,  0.0685,  ..., -0.2150, -0.4605,  0.1097],
         [ 0.0752,  0.1946,  0.0895,  ..., -0.1538, -0.1711,  0.1135],
         [ 0.1779,  0.2689,  0.0343,  ..., -0.8149, -0.2849,  0.0599],
         ...,
         [ 0.0725,  0.1764,  0.0545,  ..., -0.5356, -0.0594,  0.1084],
         [-0.1231,  0.2865, -0.1299,  ..., -0.0403, -0.1317,  0.4195],
         [ 0.0168,  0.1983,  0.0209,  ..., -0.1754, -0.4388, -0.0618]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0491],
         [0.0443],
         [0.0178],
         [0.0704],
         [0.0007],
         [0.0556],
         [0.0373],
         [0.0237],
         [0.0286]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3789], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3789], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 

sp_para_output:  tensor([[[ 0.3662,  0.2017,  0.2045,  ..., -0.8308, -0.2650,  0.0838],
         [ 0.1568,  0.2454,  0.0039,  ..., -0.6823, -0.1256,  0.1053],
         [-0.0444, -0.0445,  0.0258,  ..., -0.9261, -0.3603, -0.1885],
         ...,
         [-0.0046,  0.2332,  0.0275,  ..., -0.7856, -0.0010, -0.1019],
         [ 0.1867,  0.1304, -0.0149,  ..., -0.9476, -0.1822,  0.0607],
         [ 0.1435,  0.2642, -0.0253,  ..., -0.8577, -0.0119,  0.2505]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0057],
         [ 0.0317],
         [ 0.0134],
         [-0.0163],
         [-0.0237],
         [ 0.0688],
         [ 0.0149],
         [-0.0101],
         [-0.0408],
         [ 0.0623]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3303], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3303], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

validation_step
qid:  5ab48bab5542991779162cd3
para_indexes:  tensor([ 25,  97, 197, 247, 329, 376, 426, 496, 562, 724], device='cuda:0')
sp_para_output:  tensor([[[-6.4642e-02, -1.2324e-01, -5.4094e-02,  ..., -1.0048e-01,
           3.8976e-02,  8.4441e-02],
         [ 1.2613e-01, -7.9058e-02,  5.6148e-02,  ..., -5.5526e-01,
          -2.5839e-01,  1.0092e-01],
         [-4.9238e-02,  2.4869e-01, -1.0541e-02,  ..., -3.4399e-04,
          -1.8374e-01, -6.5472e-02],
         ...,
         [-1.9320e-01, -2.2922e-01, -8.1917e-03,  ..., -2.8127e-01,
          -2.5503e-01,  1.7891e-02],
         [-1.4330e-01, -1.7118e-01,  1.0327e-01,  ...,  9.6771e-03,
          -3.2705e-01,  1.1805e-01],
         [ 1.1411e-01,  1.3181e-01,  1.1960e-01,  ..., -5.5400e-01,
          -3.9464e-01,  5.2703e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0446],
         [ 0.0913],
         [ 0.0486],
         [ 0.0300],
         [-0.0016],
         [-0.0719],
         [ 0.0209],
         [ 0.0267],
   

validation_step
qid:  5abdd0f15542991f6610604d
para_indexes:  tensor([ 21,  52, 120, 190, 287, 438, 489, 542, 604, 686], device='cuda:0')
sp_para_output:  tensor([[[-1.0839e-01, -3.3477e-01,  2.2546e-01,  ..., -3.1430e-01,
          -6.4102e-02,  2.4582e-01],
         [-1.4469e-01, -1.8778e-01,  2.5322e-01,  ..., -3.3992e-03,
          -2.2030e-01,  5.3608e-02],
         [-2.5463e-01,  1.1298e-01, -3.0261e-01,  ...,  1.6096e-01,
          -5.1144e-03, -1.2522e-01],
         ...,
         [-2.1070e-01, -9.0393e-02,  4.1965e-02,  ..., -5.3169e-01,
          -1.3865e-01, -3.3182e-01],
         [ 7.5615e-02,  1.3268e-01, -1.2663e-04,  ..., -2.2043e-01,
          -2.0354e-01,  1.3959e-01],
         [-1.4949e-01,  1.6815e-01, -3.1500e-03,  ..., -3.8880e-01,
          -1.6206e-01,  2.0148e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[-0.0599],
         [ 0.0352],
         [ 0.0017],
         [-0.0250],
         [-0.0986],
         [-0.0240],
         [-0.0124],
         [-0.0052],
   

sp_para_output:  tensor([[[ 0.1407,  0.0347,  0.0166,  ..., -0.5797, -0.1634, -0.0487],
         [ 0.1115,  0.1048, -0.0212,  ..., -0.9302,  0.2013,  0.2980],
         [-0.2694, -0.0704, -0.0445,  ..., -0.5949,  0.0184, -0.0359],
         ...,
         [-0.0357,  0.4227,  0.0321,  ..., -0.6154,  0.1164,  0.1906],
         [-0.4840, -0.2103, -0.1878,  ...,  0.1430, -0.0728, -0.3196],
         [-0.0959,  0.0093, -0.0763,  ..., -0.7598, -0.0826, -0.0581]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0499],
         [ 0.0408],
         [-0.0148],
         [ 0.0223],
         [ 0.0536],
         [ 0.0854],
         [ 0.0714],
         [ 0.0259],
         [ 0.0058],
         [ 0.0425]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3242], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3242], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

validation_step
qid:  5ae1a7c8554299234fd042b0
para_indexes:  tensor([ 35, 145, 232, 315, 381, 431, 517, 736, 842, 960], device='cuda:0')
sp_para_output:  tensor([[[ 0.1155, -0.0805,  0.2455,  ..., -0.4795, -0.2134,  0.0203],
         [ 0.3507, -0.1564,  0.0789,  ..., -0.5271, -0.3465,  0.2347],
         [ 0.0955, -0.0476, -0.1916,  ..., -0.3682,  0.1630, -0.1080],
         ...,
         [-0.0746,  0.1683,  0.2134,  ..., -0.5621,  0.0353,  0.2630],
         [ 0.0409,  0.3065,  0.1397,  ..., -0.9024, -0.0516, -0.2119],
         [ 0.1886,  0.1002, -0.0665,  ..., -0.5025,  0.1104, -0.1660]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0517],
         [-0.0514],
         [ 0.0702],
         [ 0.0447],
         [ 0.0324],
         [-0.0037],
         [ 0.0761],
         [ 0.0240],
         [ 0.0984],
         [ 0.1008]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3445], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits: tensor([ 0.0077, -0.0896, -0.1168], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0098, -0.0677,  0.2352, -0.0475,  0.0116, -0.1478, -0.1987],
       device='cuda:0')
para_sents_offset: [0, 1, 2, 7, 8, 9, 13, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0, 1], device='cuda:0'), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.1072, -0.0038,  0.0041], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0098, -0.0677,  0.2352, -0.0475,  0.0116, -0.1478, -0.1987, -0.1069],
       device='cuda:0')
para_sents_offset: [0, 1, 2, 7, 8, 9, 13, 16, 19]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=t

para_indexes:  tensor([ 30,  66, 110, 168, 227, 355, 448, 523, 562, 729], device='cuda:0')
sp_para_output:  tensor([[[ 0.0367, -0.0796,  0.0886,  ..., -0.3723, -0.1069,  0.0986],
         [ 0.0691, -0.0928,  0.0721,  ..., -0.4402, -0.2150,  0.0930],
         [ 0.1060, -0.2984,  0.0443,  ..., -0.2997, -0.2883, -0.0204],
         ...,
         [-0.0014, -0.1856,  0.0911,  ...,  0.1246, -0.1397,  0.1612],
         [ 0.1014, -0.1253,  0.0104,  ..., -0.3165, -0.3223,  0.2098],
         [-0.1156, -0.3042, -0.0617,  ..., -0.2273, -0.1125,  0.0242]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0298],
         [0.0754],
         [0.0242],
         [0.0763],
         [0.0145],
         [0.0425],
         [0.0640],
         [0.0217],
         [0.0463],
         [0.0080]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3801], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': ten

validation_step
qid:  5a894bea55429946c8d6e910
para_indexes:  tensor([ 24,  82, 170, 339, 396, 505, 553, 659, 695, 757], device='cuda:0')
sp_para_output:  tensor([[[ 0.1816,  0.1190,  0.1663,  ..., -0.6185, -0.1553,  0.1323],
         [-0.0505,  0.1437,  0.2944,  ..., -0.3512,  0.1131,  0.0577],
         [ 0.0317,  0.1013, -0.0544,  ..., -0.6845, -0.0475,  0.1566],
         ...,
         [ 0.0025,  0.3109, -0.1055,  ..., -0.6485, -0.2654,  0.0429],
         [-0.0349,  0.3420, -0.0451,  ..., -0.3988, -0.0645, -0.1797],
         [ 0.0181,  0.3031, -0.0973,  ..., -0.5862, -0.0374, -0.2377]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0239],
         [ 0.0628],
         [ 0.0665],
         [ 0.0517],
         [-0.0304],
         [ 0.0644],
         [ 0.0432],
         [ 0.0272],
         [ 0.1511],
         [ 0.0552]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3147], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5ab28156554299449642c8ce
para_indexes:  tensor([  19,  173,  305,  409,  445,  539,  795,  875,  951, 1034],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0681,  0.3078,  0.0008,  ..., -0.5568, -0.0348, -0.0266],
         [-0.0492, -0.1949, -0.1415,  ..., -0.5732,  0.0179,  0.0287],
         [-0.2382, -0.1947,  0.0251,  ..., -0.6422, -0.0963, -0.0013],
         ...,
         [-0.0874,  0.1886,  0.0378,  ..., -0.5850, -0.1487, -0.2006],
         [ 0.0093, -0.0059,  0.0053,  ..., -0.4642, -0.2792, -0.0392],
         [ 0.1563, -0.1240,  0.0991,  ..., -0.4965,  0.0392, -0.1154]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0177],
         [-0.0111],
         [-0.0593],
         [-0.0716],
         [ 0.0614],
         [-0.0386],
         [-0.0700],
         [-0.0152],
         [-0.0773],
         [-0.0270]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3342], device='cuda:0', dtype=torch.flo

validation_step
qid:  5adbdc8a55429944faac238e
para_indexes:  tensor([ 21,  87, 216, 293, 386, 503, 571, 617, 677, 715], device='cuda:0')
sp_para_output:  tensor([[[ 3.5334e-01, -2.5335e-01,  3.3820e-01,  ...,  5.6511e-02,
          -2.5644e-01, -6.1986e-02],
         [ 1.3609e-01,  2.0887e-01, -1.6516e-01,  ..., -3.6052e-01,
          -1.1765e-01,  2.9915e-01],
         [-1.2375e-02, -2.8756e-01,  2.1833e-01,  ...,  1.6174e-01,
          -2.8476e-01, -1.2826e-01],
         ...,
         [-1.8090e-01,  2.6499e-01, -2.0917e-01,  ..., -3.5516e-01,
          -4.8049e-02,  1.1189e-01],
         [-1.3081e-01,  2.9315e-01,  1.0666e-01,  ..., -3.2448e-04,
          -2.4756e-01, -1.5428e-01],
         [ 4.3562e-02, -7.7185e-02,  2.0579e-01,  ...,  1.5772e-01,
          -2.7110e-01,  1.4088e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[-0.0211],
         [ 0.0009],
         [-0.0318],
         [-0.0417],
         [-0.0765],
         [ 0.0238],
         [-0.0013],
         [ 0.0366],
   

sp_para_output:  tensor([[[-0.0371,  0.2224, -0.0957,  ..., -0.3919, -0.2371, -0.1271],
         [-0.0825, -0.0654,  0.0540,  ..., -0.4727, -0.1625,  0.0629],
         [ 0.0825,  0.1037,  0.1243,  ..., -0.0732, -0.4799, -0.1035],
         ...,
         [ 0.0090, -0.0704, -0.0435,  ..., -0.2333, -0.4355, -0.0919],
         [-0.3579, -0.0791,  0.0188,  ..., -0.2091, -0.3050, -0.1533],
         [-0.0623, -0.0778,  0.1163,  ..., -0.4507, -0.2422, -0.0834]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0184],
         [ 0.0691],
         [ 0.0082],
         [ 0.0384],
         [-0.0397],
         [-0.0330],
         [ 0.0434],
         [-0.0043],
         [ 0.0067],
         [-0.0127]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2991], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2991], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 2

validation_step
qid:  5a74d7685542996c70cfae0a
para_indexes:  tensor([ 19, 120, 188, 226, 333, 357, 381, 453, 496, 636], device='cuda:0')
sp_para_output:  tensor([[[-0.2306,  0.2186, -0.0061,  ..., -0.4516, -0.0137,  0.0155],
         [-0.0474,  0.1854,  0.0010,  ..., -0.1076,  0.0059, -0.2185],
         [-0.2871, -0.0231,  0.0356,  ..., -0.2071, -0.2260, -0.1691],
         ...,
         [ 0.0110, -0.0248, -0.0524,  ..., -0.1617,  0.0412,  0.2380],
         [ 0.1233,  0.0251,  0.0732,  ..., -0.1973, -0.1959, -0.2131],
         [-0.0283, -0.1727,  0.1650,  ..., -0.0227, -0.2558, -0.2989]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0331],
         [ 0.0825],
         [ 0.0193],
         [-0.0296],
         [ 0.0239],
         [ 0.0232],
         [-0.0535],
         [ 0.0836],
         [ 0.0510],
         [ 0.0202]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2859], device='cuda:0', dtype=torch.float16), 'start_log

         0.1619,  0.3486], device='cuda:0')
para_sents_offset: [0, 2, 3, 5, 7, 8, 10, 13, 16, 18, 21]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([0, 2], device='cuda:0'), 7: tensor([1], device='cuda:0'), 8: tensor([0], device='cuda:0'), 9: tensor([0, 2], device='cuda:0')}
sp_para_pred: tensor([9, 6], device='cuda:0')
validation_step
qid:  5a810506554299260e20a202
para_indexes:  tensor([ 18, 122, 183, 228, 278, 316, 363, 425, 501, 563], device='cuda:0')
sp_para_output:  tensor([[[ 0.2544,  0.0517,  0.0754,  ..., -0.6370, -0.1969, -0.2687],
         [-0.0532,  0.0577, -0.0196,  ..., -0.0337, -0.2816, -0.3666],
         [-0.2537,  0.1109,  0.1391,  ..., -0.2572, -0.1714, -0.0770],
         ...,
    

validation_step
qid:  5ae2e0fd55429928c4239524
para_indexes:  tensor([  22,  149,  264,  413,  641,  756,  829,  920, 1036, 1089],
       device='cuda:0')
sp_para_output:  tensor([[[ 2.4028e-01,  2.1238e-04,  1.1240e-01,  ..., -4.5680e-01,
          -3.5668e-01,  5.9857e-02],
         [ 2.4138e-02,  1.9740e-01, -9.7441e-02,  ..., -4.4495e-01,
           1.1171e-01,  1.2270e-01],
         [ 3.7488e-02,  1.8370e-01, -1.5404e-01,  ..., -6.6157e-01,
           2.2147e-02,  4.3156e-01],
         ...,
         [ 7.9070e-02, -1.6977e-01,  2.2916e-01,  ..., -5.3147e-01,
           7.8032e-02,  1.6718e-01],
         [ 2.8269e-01,  7.5435e-02,  5.4192e-02,  ..., -7.6487e-01,
          -2.5454e-01,  2.3886e-01],
         [ 2.2867e-01, -1.7455e-01, -5.5773e-02,  ..., -5.9717e-01,
          -2.7621e-01, -5.8147e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[-0.0344],
         [ 0.0149],
         [-0.0645],
         [ 0.0038],
         [-0.0063],
         [-0.0450],
         [-0.0104],
      

validation_step
qid:  5abb291355429939ce03dd9e
para_indexes:  tensor([ 14,  70,  93, 153, 194, 255, 306, 385, 431], device='cuda:0')
sp_para_output:  tensor([[[-0.0014, -0.0453,  0.2624,  ..., -0.0391, -0.0629,  0.0993],
         [-0.2367,  0.0449,  0.2601,  ..., -0.1495,  0.0150,  0.2042],
         [-0.1632, -0.1979,  0.1493,  ...,  0.2088,  0.0147,  0.0016],
         ...,
         [-0.3518, -0.1387,  0.0832,  ...,  0.2934, -0.0983, -0.1502],
         [-0.2250, -0.3322,  0.0729,  ...,  0.2569, -0.2361, -0.3929],
         [-0.2939, -0.4207,  0.2179,  ...,  0.1111, -0.0443,  0.0240]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0130],
         [ 0.0946],
         [ 0.0767],
         [ 0.0255],
         [ 0.0286],
         [-0.0124],
         [-0.0067],
         [-0.0410],
         [ 0.0820]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3584], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit

s_to_p_map:  [0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 7, 8, 9]
para_sent_logits: tensor([0.0862, 0.1170, 0.0491], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.2523], device='cuda:0')
para_sents_offset: [0, 3]
evidence_candidates: {0: tensor([1], device='cuda:0')}
para_sent_logits: tensor([0.0472, 0.0632, 0.0255], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.2523, 0.1359], device='cuda:0')
para_sents_offset: [0, 3, 6]
evidence_candidates: {0: tensor([1], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.1232, 0.1149, 0.0288, 0.0696, 0.0869], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.2523, 0.1359, 0.4234], device='cuda:0')
para_sents_offset: [0, 3, 6, 11]
evidence_candidates: {0: tensor([1], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([0, 1], device='cuda:0')}
para_sent_logits: tensor([0.1545, 0.1265], devi

validation_step
qid:  5abbb116554299642a094b1d
para_indexes:  tensor([ 19,  97, 168, 217, 290, 389, 437, 510, 586, 648], device='cuda:0')
sp_para_output:  tensor([[[ 0.0307,  0.0103,  0.0204,  ..., -0.1870,  0.0166,  0.2270],
         [-0.0474, -0.0062,  0.0234,  ..., -0.4586,  0.1702,  0.2851],
         [-0.0947,  0.0584, -0.0721,  ..., -0.4809, -0.1177,  0.3233],
         ...,
         [ 0.0420, -0.3343, -0.0492,  ..., -0.0690,  0.0048,  0.3439],
         [ 0.0255, -0.2072,  0.1321,  ..., -0.5083,  0.0876,  0.4078],
         [-0.0773, -0.1194,  0.1801,  ..., -0.2086,  0.0874,  0.3351]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0229],
         [0.0810],
         [0.0412],
         [0.0682],
         [0.0565],
         [0.0040],
         [0.1237],
         [0.1143],
         [0.0665],
         [0.0464]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3411], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

sp_para_output:  tensor([[[-0.0432,  0.0294, -0.0752,  ..., -0.1130, -0.3493,  0.0278],
         [-0.0065,  0.1661,  0.1710,  ..., -0.4474, -0.3765, -0.1139],
         [-0.1828,  0.0462,  0.0104,  ..., -0.4022, -0.3180,  0.2630],
         ...,
         [-0.1870,  0.0206,  0.1393,  ..., -0.5699, -0.2718, -0.0616],
         [-0.0970,  0.0039, -0.0357,  ..., -0.2155, -0.5830, -0.1056],
         [-0.1502,  0.2881, -0.2552,  ...,  0.0705, -0.1314,  0.0379]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0691],
         [ 0.0053],
         [ 0.1292],
         [ 0.0568],
         [-0.0092],
         [ 0.0377],
         [ 0.0319],
         [ 0.0457],
         [ 0.0183],
         [ 0.1250]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3093], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3093], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 2, 2, 3

sp_para_output:  tensor([[[ 0.1001,  0.4145,  0.0026,  ..., -0.5516, -0.1405,  0.3521],
         [ 0.1045,  0.0911, -0.1229,  ..., -0.9554, -0.1275, -0.0360],
         [ 0.2214,  0.3519,  0.2757,  ..., -0.5842, -0.4282, -0.2274],
         ...,
         [ 0.0699, -0.0669,  0.1902,  ..., -0.4608,  0.0552,  0.4010],
         [ 0.0486,  0.0418,  0.0052,  ..., -0.1636, -0.2748, -0.1047],
         [ 0.0560,  0.2019,  0.0412,  ..., -0.5879, -0.1234, -0.0398]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0012],
         [ 0.0231],
         [-0.0103],
         [ 0.0236],
         [-0.0010],
         [ 0.0500],
         [ 0.0639],
         [-0.0093],
         [ 0.0781],
         [ 0.0238]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3276], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3276], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 0, 0

validation_step
qid:  5a8a57fe55429930ff3c0da0
para_indexes:  tensor([  19,  124,  272,  340,  421,  575,  641,  730,  888, 1015],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.3199, -0.0035,  0.2648,  ..., -1.0266,  0.1053,  0.1427],
         [ 0.1440,  0.1254,  0.1373,  ..., -0.9197, -0.0452, -0.2986],
         [ 0.1668,  0.1908,  0.0805,  ..., -0.7284, -0.2158, -0.0458],
         ...,
         [-0.0900,  0.0863,  0.0902,  ..., -0.7822,  0.0716,  0.2634],
         [ 0.1551,  0.2195,  0.0995,  ..., -0.8268, -0.3871, -0.3233],
         [ 0.0538,  0.1371, -0.0187,  ..., -0.3233,  0.1494,  0.0116]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0447],
         [ 0.0909],
         [-0.0258],
         [ 0.0248],
         [ 0.0102],
         [ 0.1193],
         [ 0.0801],
         [ 0.1025],
         [ 0.0227],
         [ 0.0792]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3101], device='cuda:0', dtype=torch.flo

validation_step
qid:  5a8aed1755429950cd6afbf1
para_indexes:  tensor([ 15,  76, 146, 268, 308, 334, 398, 461, 552, 569], device='cuda:0')
sp_para_output:  tensor([[[-0.1080,  0.0928,  0.3967,  ...,  0.0925, -0.0353,  0.1091],
         [-0.0529,  0.1065,  0.2340,  ..., -0.3734, -0.0705,  0.0369],
         [-0.0231, -0.0819,  0.0905,  ...,  0.1800, -0.1246, -0.1090],
         ...,
         [-0.0741,  0.0724,  0.2152,  ...,  0.1384,  0.0209, -0.0799],
         [ 0.0950,  0.3384,  0.3073,  ...,  0.0682, -0.0518, -0.0366],
         [ 0.1727, -0.0180,  0.2327,  ..., -0.2106, -0.2021,  0.0970]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0609],
         [ 0.0714],
         [ 0.0378],
         [-0.0176],
         [-0.0145],
         [ 0.0306],
         [ 0.0260],
         [ 0.0663],
         [ 0.0441],
         [ 0.0562]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3635], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 1.5271e-01, -6.9363e-02,  8.3211e-02,  ..., -7.8714e-01,
          -4.4824e-01, -1.8707e-01],
         [ 5.8404e-02, -1.6647e-01,  1.4409e-02,  ..., -9.6323e-01,
          -5.2464e-02,  1.8565e-01],
         [ 2.0728e-02,  6.2560e-01, -2.2795e-01,  ..., -9.4676e-01,
          -3.1755e-01, -7.4321e-02],
         ...,
         [ 1.5908e-01,  3.1557e-02,  3.8028e-02,  ..., -6.4492e-01,
          -3.9273e-01, -3.5808e-02],
         [-3.5087e-02,  8.9702e-04,  1.0333e-01,  ..., -4.7508e-01,
          -2.5745e-01,  1.1416e-01],
         [-2.2598e-02,  4.3052e-01, -1.7227e-01,  ..., -6.5073e-01,
          -1.8523e-01, -4.0611e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0522],
         [ 0.0331],
         [-0.0916],
         [ 0.1256],
         [ 0.0279],
         [ 0.0113],
         [ 0.0588],
         [ 0.0401],
         [ 0.0099],
         [-0.0386]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor(

validation_step
qid:  5ac45b23554299204fd21f31
para_indexes:  tensor([ 35, 166, 315, 402, 495, 564, 649, 672, 750, 799], device='cuda:0')
sp_para_output:  tensor([[[ 0.4018, -0.3139,  0.3015,  ..., -0.5121, -0.1390,  0.2255],
         [ 0.3556,  0.2695,  0.0536,  ..., -0.2795, -0.3315, -0.2893],
         [ 0.2231,  0.0656,  0.3256,  ..., -0.4350, -0.0049, -0.1211],
         ...,
         [ 0.0154,  0.3305,  0.0570,  ..., -0.1993,  0.0334, -0.2428],
         [ 0.1214,  0.1775,  0.0351,  ..., -0.5009,  0.1764, -0.0677],
         [ 0.3803,  0.3072,  0.1786,  ..., -0.6843, -0.0685, -0.1544]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0104],
         [-0.0267],
         [-0.0836],
         [-0.0634],
         [-0.0081],
         [ 0.0355],
         [-0.0045],
         [ 0.0750],
         [-0.0208],
         [-0.0944]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3879], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.1334,  0.1515,  0.2417,  ..., -0.2434, -0.4938, -0.0008],
         [-0.0944,  0.3450, -0.1278,  ..., -0.4807, -0.0665, -0.0826],
         [ 0.0815, -0.0151,  0.1460,  ..., -0.2944, -0.2078,  0.1304],
         ...,
         [-0.0412, -0.0191,  0.0727,  ...,  0.0044, -0.0363,  0.0820],
         [-0.1994, -0.1682,  0.3676,  ..., -0.0405, -0.1980,  0.2116],
         [-0.2373,  0.0456,  0.0528,  ...,  0.0390, -0.0969,  0.1211]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0017],
         [-0.0269],
         [ 0.0264],
         [ 0.0071],
         [ 0.0731],
         [ 0.0229],
         [ 0.0441],
         [ 0.0211],
         [ 0.0465],
         [ 0.0312]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2554], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2554], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([1, 5], device='cuda:0')
validation_step
qid:  5ab9e477554299232ef4a25d
para_indexes:  tensor([ 17,  48, 118, 278, 329, 366, 428, 544, 640, 774], device='cuda:0')
sp_para_output:  tensor([[[ 0.1743, -0.0750,  0.1392,  ..., -0.1708, -0.1126,  0.1708],
         [ 0.0675, -0.1059,  0.0728,  ..., -0.0311, -0.1121,  0.0582],
         [ 0.0936, -0.0521,  0.1251,  ..., -0.3395, -0.0630, -0.1776],
         ...,
         [-0.0517, -0.0858,  0.1368,  ..., -0.0898, -0.321

para_sent_logits: tensor([0.0131, 0.0408], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1096, 0.2161, 0.0656, 0.1748, 0.0539], device='cuda:0')
para_sents_offset: [0, 2, 5, 8, 11, 13]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([0], device='cuda:0'), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.1262, 0.0366, 0.0477], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.1096, 0.2161, 0.0656, 0.1748, 0.0539, 0.2105], device='cuda:0')
para_sents_offset: [0, 2, 5, 8, 11, 13, 16]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([0], device='cuda:0'), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([0], device='cuda:0')}
para_sent_logits: tensor([0.0067], device='cuda

validation_step
qid:  5a8484495542997175ce1ee8
para_indexes:  tensor([ 24,  73, 179, 242, 497, 568, 624, 753, 846, 922], device='cuda:0')
sp_para_output:  tensor([[[ 0.1058, -0.0829, -0.0782,  ..., -0.3180, -0.1209,  0.0608],
         [ 0.0576, -0.0728,  0.1126,  ..., -0.3811, -0.1564,  0.2931],
         [ 0.0567, -0.0239, -0.1398,  ..., -0.7276, -0.3590,  0.0238],
         ...,
         [-0.2390, -0.2910,  0.0885,  ..., -0.2424, -0.3039,  0.3159],
         [-0.0325, -0.0646, -0.0751,  ..., -0.4086, -0.2381,  0.1613],
         [-0.1584,  0.0911,  0.0093,  ..., -0.6479, -0.3648,  0.0143]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0787],
         [ 0.1443],
         [ 0.0457],
         [ 0.0015],
         [ 0.1089],
         [ 0.1251],
         [ 0.0060],
         [-0.0069],
         [ 0.0023],
         [ 0.0653]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3369], device='cuda:0', dtype=torch.float16), 'start_log

s_to_p_map:  [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9]
para_sent_logits: tensor([ 0.0502, -0.0182, -0.0269, -0.0747], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0697], device='cuda:0')
para_sents_offset: [0, 4]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0404,  0.0571,  0.0271,  0.0396], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0697,  0.0834], device='cuda:0')
para_sents_offset: [0, 4, 8]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0296,  0.0392, -0.0145], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0697,  0.0834,  0.0544], device='cuda:0')
para_sents_offset: [0, 4, 8, 11]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.i

sp_para_output:  tensor([[[ 0.1553,  0.1561,  0.1615,  ..., -0.0545, -0.2930,  0.2650],
         [ 0.0301,  0.1366, -0.0449,  ..., -0.0665, -0.2595,  0.0241],
         [-0.2230,  0.3305, -0.0508,  ..., -0.1420,  0.3486,  0.1515],
         ...,
         [-0.1869,  0.1740, -0.1322,  ...,  0.0106, -0.3405,  0.0154],
         [ 0.0392,  0.4257,  0.0848,  ..., -0.2554, -0.5333, -0.2082],
         [-0.0389,  0.1098,  0.1425,  ..., -0.3241, -0.0542,  0.1169]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0568],
         [-0.0298],
         [-0.0787],
         [-0.0834],
         [ 0.0953],
         [ 0.0389],
         [-0.0700],
         [-0.0829],
         [-0.1312],
         [ 0.0271]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3604], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3604], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 2

para_sent_logits: tensor([-0.0333,  0.0089], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1117,  0.0607,  0.2004, -0.0297,  0.1405, -0.0244], device='cuda:0')
para_sents_offset: [0, 5, 7, 11, 12, 16, 18]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0173, -0.0047, -0.0038, -0.0639], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1117,  0.0607,  0.2004, -0.0297,  0.1405, -0.0244, -0.0898],
       device='cuda:0')
para_sents_offset: [0, 5, 7, 11, 12, 16, 18, 22]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.

sp_para_output:  tensor([[[ 0.3478, -0.0301,  0.0175,  ..., -0.7462, -0.1459,  0.1146],
         [ 0.2602,  0.1581, -0.1206,  ..., -0.5851, -0.1426, -0.0202],
         [ 0.0457,  0.0676,  0.1290,  ..., -0.6484, -0.0101,  0.1456],
         ...,
         [-0.0630,  0.1728, -0.1428,  ..., -0.6063,  0.0891,  0.0732],
         [-0.1465,  0.0151,  0.0430,  ..., -0.8965,  0.2158,  0.0716],
         [ 0.1953, -0.0318,  0.0414,  ..., -0.4427,  0.2276,  0.3615]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0139],
         [ 0.0332],
         [ 0.0503],
         [ 0.0334],
         [ 0.0293],
         [-0.0422],
         [ 0.0259],
         [ 0.1251],
         [-0.0619],
         [-0.0101]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4160], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4160], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

validation_step
qid:  5ab8494d55429916710eb016
para_indexes:  tensor([  22,  196,  307,  447,  538,  653,  776,  859, 1035, 1144],
       device='cuda:0')
sp_para_output:  tensor([[[-0.2053,  0.1806, -0.0771,  ..., -0.4048,  0.2362,  0.1408],
         [ 0.0854, -0.1240, -0.0973,  ..., -0.7236, -0.0608,  0.2498],
         [ 0.1858, -0.0873, -0.0331,  ..., -0.9469, -0.0463, -0.0618],
         ...,
         [-0.0632,  0.0346,  0.1149,  ..., -0.7452, -0.0567,  0.1920],
         [ 0.1019,  0.3025, -0.1177,  ..., -0.5828,  0.3435,  0.3582],
         [ 0.1774,  0.0629, -0.0691,  ..., -0.6713, -0.0871, -0.1058]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0453],
         [ 0.0273],
         [-0.0195],
         [ 0.0306],
         [ 0.0593],
         [ 0.0656],
         [ 0.0189],
         [ 0.0582],
         [ 0.0385],
         [ 0.0464]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3176], device='cuda:0', dtype=torch.flo

validation_step
qid:  5a7b48b15542995eb53be8eb
para_indexes:  tensor([ 23,  83, 170, 260, 351, 443, 509, 622, 763, 882], device='cuda:0')
sp_para_output:  tensor([[[ 2.7254e-01, -3.8072e-01, -7.7379e-02,  ...,  2.3035e-04,
          -1.3934e-01,  2.2338e-01],
         [ 1.1606e-02,  4.8489e-02,  1.2648e-01,  ...,  1.7606e-01,
           3.6159e-02,  2.3671e-01],
         [-1.5155e-01,  1.3877e-01, -1.4439e-01,  ..., -3.8051e-01,
           1.0854e-01,  2.0196e-01],
         ...,
         [-6.7177e-02, -6.0815e-02, -1.3838e-02,  ...,  2.0387e-02,
          -1.1172e-01,  2.1320e-01],
         [-1.1485e-01,  8.8387e-03, -4.9818e-02,  ...,  1.0173e-01,
          -7.5863e-02, -6.1025e-02],
         [ 5.7156e-02, -1.2539e-01, -2.3703e-01,  ..., -6.7666e-02,
          -2.9235e-01,  3.1340e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[-0.0484],
         [ 0.0256],
         [-0.0126],
         [-0.0513],
         [-0.0164],
         [ 0.0139],
         [ 0.0587],
         [ 0.0034],
   

validation_step
qid:  5ab90d2755429916710eb0f0
para_indexes:  tensor([  25,   53,  230,  279,  488,  605,  822,  902, 1011, 1123],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.1379, -0.1937,  0.0319,  ..., -0.7560, -0.1834,  0.0387],
         [ 0.1137,  0.0261, -0.0046,  ..., -0.8167, -0.0167, -0.0236],
         [ 0.0353, -0.0297, -0.0724,  ..., -0.6158, -0.2344, -0.0884],
         ...,
         [-0.2499, -0.0986,  0.0101,  ..., -0.5123, -0.0742, -0.1230],
         [-0.0676,  0.1134,  0.0424,  ..., -0.2599,  0.2142, -0.3313],
         [ 0.1205,  0.0173,  0.0423,  ..., -0.6766, -0.1085,  0.0382]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0989],
         [ 0.0639],
         [-0.0363],
         [ 0.0040],
         [ 0.0925],
         [ 0.0683],
         [ 0.0434],
         [-0.0941],
         [ 0.1676],
         [ 0.0641]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3818], device='cuda:0', dtype=torch.flo

para_sent_logits: tensor([-0.0082,  0.0295,  0.0353], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0467,  0.0892,  0.2385, -0.0901, -0.0395,  0.0482,  0.0061, -0.0647,
         0.0566], device='cuda:0')
para_sents_offset: [0, 3, 6, 10, 11, 12, 13, 14, 18, 21]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.1115,  0.0283, -0.0093,  0.0283,  0.0736], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0467,  0.0892,  0.2385, -0.0901, -0.0395,  0.0482,  0.0061, -0.0647,
         0.0566,  0.2324], devic

sp_para_output:  tensor([[[ 0.2549,  0.0730,  0.1389,  ...,  0.0725, -0.2067,  0.1661],
         [ 0.0857, -0.0749,  0.0653,  ..., -0.4307, -0.4014,  0.1443],
         [-0.0007,  0.0172, -0.2188,  ..., -0.2380, -0.1002,  0.0421],
         ...,
         [-0.0758,  0.4339, -0.0660,  ..., -0.5668, -0.4737, -0.1897],
         [ 0.0486,  0.2857, -0.1453,  ..., -0.4893, -0.0348, -0.0483],
         [ 0.0166,  0.1008,  0.1941,  ..., -0.0620,  0.3015,  0.2147]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0804],
         [ 0.1113],
         [ 0.0523],
         [ 0.0316],
         [-0.0125],
         [ 0.1261],
         [ 0.1091],
         [ 0.0738],
         [ 0.0913],
         [ 0.0662]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3901], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3901], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

validation_step
qid:  5a7781c955429949eeb29ea8
para_indexes:  tensor([ 23, 107, 175, 305, 443, 492, 614, 668, 703, 791], device='cuda:0')
sp_para_output:  tensor([[[ 0.3754, -0.1675,  0.3469,  ...,  0.1787, -0.3881,  0.1447],
         [ 0.0797,  0.0672,  0.1987,  ..., -0.2381, -0.1816,  0.5860],
         [ 0.1689,  0.0537,  0.1884,  ..., -0.1663, -0.3165,  0.5589],
         ...,
         [ 0.3040,  0.1572,  0.0118,  ..., -0.2761, -0.3450,  0.0700],
         [ 0.1267,  0.1538,  0.1997,  ..., -0.6252, -0.0719,  0.2089],
         [-0.1921,  0.3292, -0.1862,  ..., -0.2555, -0.0700,  0.5551]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0374],
         [ 0.0366],
         [-0.0379],
         [-0.0348],
         [ 0.0076],
         [ 0.0643],
         [ 0.0787],
         [-0.0225],
         [ 0.0338],
         [-0.0612]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3386], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5ac19ff35542991316484b61
para_indexes:  tensor([ 19,  81, 191, 338, 565, 618, 694, 732, 792, 920], device='cuda:0')
sp_para_output:  tensor([[[ 2.9847e-01, -3.8713e-01,  8.0779e-02,  ..., -6.4562e-01,
          -3.3338e-02,  2.1672e-01],
         [ 6.0196e-02, -2.2385e-01,  1.9421e-02,  ...,  3.5370e-01,
           1.2907e-01,  7.0431e-02],
         [-2.4256e-01, -1.1444e-01,  3.0651e-01,  ..., -5.0099e-01,
          -5.6731e-04,  5.4381e-02],
         ...,
         [-1.9370e-01,  2.0918e-02,  1.1072e-01,  ..., -5.9890e-01,
          -1.6342e-01, -3.4794e-02],
         [-4.1812e-02,  6.6888e-02,  2.3135e-01,  ..., -8.1508e-01,
           1.1237e-01,  1.7553e-02],
         [ 1.1747e-01, -3.9070e-02, -2.6849e-02,  ..., -8.0883e-01,
          -2.2648e-02,  3.4765e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0091],
         [ 0.1094],
         [ 0.0758],
         [ 0.0136],
         [ 0.0681],
         [ 0.0923],
         [ 0.0266],
         [ 0.0342],
   

para_sent_logits_sum: tensor([ 0.0330, -0.0410, -0.0117, -0.0381,  0.0211], device='cuda:0')
para_sents_offset: [0, 3, 4, 5, 6, 8]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0335,  0.0274], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0330, -0.0410, -0.0117, -0.0381,  0.0211, -0.0062], device='cuda:0')
para_sents_offset: [0, 3, 4, 5, 6, 8, 10]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0532, -0

validation_step
qid:  5a808c215542995d8a8ddf90
para_indexes:  tensor([ 19,  65, 166, 194, 219, 279, 440, 484, 681, 765], device='cuda:0')
sp_para_output:  tensor([[[ 0.1793, -0.0721,  0.0632,  ..., -0.6858, -0.3027,  0.1233],
         [ 0.1178,  0.0102, -0.0103,  ..., -0.4323, -0.3251, -0.0737],
         [-0.2233,  0.4964, -0.0606,  ..., -0.1429, -0.5385, -0.1986],
         ...,
         [ 0.0481,  0.1098, -0.0053,  ..., -0.3070, -0.1748, -0.2636],
         [-0.2156,  0.4223,  0.0269,  ..., -0.2380, -0.5680, -0.2414],
         [-0.3407, -0.1034, -0.1547,  ...,  0.1416, -0.0696,  0.0206]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0766],
         [ 0.0805],
         [-0.0375],
         [ 0.0862],
         [ 0.0776],
         [-0.0145],
         [-0.0339],
         [ 0.0659],
         [-0.0306],
         [ 0.0352]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3999], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.1790,  0.3285,  0.1846,  ..., -1.0789, -0.2886,  0.0549],
         [ 0.1662,  0.0999,  0.1162,  ..., -0.6059, -0.1495,  0.0172],
         [ 0.0821,  0.2291,  0.0261,  ..., -1.1218, -0.1747,  0.0439],
         ...,
         [-0.1036,  0.2432, -0.0177,  ..., -0.5689, -0.1152, -0.0608],
         [-0.0620,  0.4462, -0.0014,  ..., -0.5480, -0.0030,  0.1213],
         [-0.0927,  0.3130,  0.0662,  ..., -1.1642, -0.2067,  0.2756]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0583],
         [ 0.0482],
         [ 0.0436],
         [ 0.0532],
         [-0.0544],
         [-0.0166],
         [-0.0980],
         [ 0.0047],
         [ 0.1431],
         [ 0.0096]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3223], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3223], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 2

validation_step
qid:  5a8b7ca65542997f31a41d49
para_indexes:  tensor([  18,  138,  301,  472,  636,  764,  932,  989, 1127, 1181],
       device='cuda:0')
sp_para_output:  tensor([[[-0.0024,  0.0556,  0.2995,  ..., -0.5578, -0.1908,  0.2639],
         [-0.0552, -0.1915, -0.0272,  ..., -0.1805, -0.3110,  0.0720],
         [-0.1494,  0.0446,  0.0537,  ..., -0.4053, -0.3051,  0.0104],
         ...,
         [-0.1636,  0.1856, -0.1089,  ...,  0.2846, -0.0623, -0.0491],
         [-0.1799,  0.2391,  0.0377,  ...,  0.0385, -0.0511,  0.0977],
         [-0.0684, -0.0771, -0.0895,  ..., -0.0541, -0.1260,  0.0574]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0248],
         [-0.0209],
         [-0.0714],
         [-0.0073],
         [ 0.0148],
         [ 0.0006],
         [-0.0403],
         [ 0.0605],
         [ 0.0765],
         [-0.0060]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3433], device='cuda:0', dtype=torch.flo

validation_step
qid:  5ab613985542992aa134a418
para_indexes:  tensor([ 23, 132, 200, 233, 270, 338, 498, 568, 623], device='cuda:0')
sp_para_output:  tensor([[[ 0.1895, -0.0160,  0.1098,  ..., -0.1267, -0.1297,  0.0861],
         [-0.2347,  0.3439,  0.0387,  ..., -0.4351, -0.1712,  0.0814],
         [-0.1541,  0.0287, -0.0876,  ..., -0.0999, -0.0526,  0.1301],
         ...,
         [ 0.0033, -0.0111,  0.1779,  ..., -0.4152, -0.0242,  0.1242],
         [ 0.1169, -0.0862,  0.3437,  ..., -0.7687, -0.0580,  0.2163],
         [ 0.0360,  0.0790,  0.1207,  ..., -0.5463, -0.2156,  0.0652]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0074],
         [0.0435],
         [0.0243],
         [0.0215],
         [0.0035],
         [0.0592],
         [0.0950],
         [0.0753],
         [0.0607]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3455], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -10000

validation_step
qid:  5a76a8bb5542993569682c76
para_indexes:  tensor([ 28, 107, 215, 258, 342, 374, 437, 564, 665], device='cuda:0')
sp_para_output:  tensor([[[ 1.2288e-01,  5.9039e-02,  1.4212e-01,  ..., -4.3296e-01,
          -3.7070e-01,  2.3259e-01],
         [ 4.2314e-03,  4.2283e-02,  1.6933e-01,  ..., -1.2854e-01,
          -3.6487e-01,  1.2627e-01],
         [-1.8782e-01,  2.4284e-01, -5.0663e-02,  ..., -2.6728e-01,
          -3.9433e-01, -1.5425e-01],
         ...,
         [ 2.2670e-02,  8.5033e-02,  3.5668e-02,  ..., -2.6964e-04,
          -6.1539e-01, -7.0595e-02],
         [ 2.0722e-01, -1.1747e-01,  1.2323e-01,  ..., -3.6253e-01,
          -3.8518e-01,  2.2134e-01],
         [ 1.1004e-01,  8.5623e-02, -4.7279e-02,  ..., -1.2888e-01,
          -4.8896e-01,  7.6443e-02]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0181],
         [-0.0182],
         [-0.0148],
         [-0.0601],
         [-0.0247],
         [-0.0358],
         [-0.0154],
         [ 0.0010],
        

sp_para_output:  tensor([[[-0.0194, -0.0759,  0.0234,  ..., -0.6345, -0.0957,  0.1433],
         [ 0.1550, -0.1598,  0.2208,  ..., -0.0527, -0.0320,  0.3594],
         [-0.1371,  0.1003, -0.0768,  ..., -0.2923, -0.2434,  0.0580],
         ...,
         [-0.0823, -0.0532, -0.0068,  ..., -0.0579, -0.3241,  0.3016],
         [-0.1833,  0.1025,  0.0130,  ..., -0.5544, -0.2031,  0.3178],
         [-0.3374, -0.1848, -0.0333,  ..., -0.2954, -0.1083,  0.0729]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0508],
         [ 0.0322],
         [-0.0117],
         [ 0.0066],
         [ 0.0714],
         [ 0.0489],
         [ 0.0247],
         [ 0.0058],
         [-0.0304],
         [-0.0146]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2917], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.2917], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 1

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.1427, -0.0061,  0.0943, -0.0111], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0290,  0.2268,  0.0342, -0.0418,  0.0487, -0.0161,  0.2198],
       device='cuda:0')
para_sents_offset: [0, 3, 7, 10, 12, 16, 19, 23]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([0], device='cuda:0')}
para_sent_logits: tensor([0.0311], d

sp_para_output:  tensor([[[ 0.1483, -0.2834,  0.1302,  ..., -0.1463, -0.2274,  0.1378],
         [ 0.0229, -0.0226,  0.1014,  ..., -0.4434, -0.1879,  0.1709],
         [ 0.0407, -0.1839, -0.0286,  ..., -0.0850, -0.3017, -0.0319],
         ...,
         [ 0.1938, -0.3046,  0.0784,  ..., -0.2915, -0.0922, -0.1480],
         [ 0.1257, -0.1279, -0.0007,  ..., -0.6118, -0.0951,  0.0545],
         [ 0.0260, -0.0377, -0.1061,  ..., -0.4165, -0.3655,  0.0085]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0604],
         [ 0.0864],
         [ 0.0284],
         [ 0.0529],
         [-0.0605],
         [ 0.0098],
         [ 0.0245],
         [ 0.0037],
         [-0.0214],
         [-0.0455]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4399], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4399], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 2, 2

validation_step
qid:  5adcfb5f5542990d50227d81
para_indexes:  tensor([ 19,  37, 137, 188, 263, 374, 403, 472, 513, 555], device='cuda:0')
sp_para_output:  tensor([[[ 0.0118,  0.1804,  0.1308,  ..., -0.6960,  0.0301,  0.0953],
         [ 0.0480,  0.1199,  0.0864,  ..., -0.7943, -0.3289,  0.1381],
         [-0.2024, -0.1416,  0.0181,  ..., -0.4847, -0.2572, -0.0683],
         ...,
         [-0.2801, -0.2039, -0.1756,  ..., -0.4636, -0.1957,  0.0408],
         [ 0.1898,  0.0069,  0.0382,  ..., -0.2498, -0.1896,  0.2913],
         [-0.0747, -0.0901,  0.5076,  ..., -0.6231, -0.2980,  0.0957]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0851],
         [ 0.0645],
         [ 0.0230],
         [ 0.0646],
         [ 0.0034],
         [-0.0191],
         [ 0.0005],
         [ 0.1128],
         [ 0.0172],
         [ 0.0805]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4421], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a8c42445542996e8ac88a3f
para_indexes:  tensor([  35,  184,  274,  408,  498,  570,  682,  909,  998, 1060],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0919,  0.6210,  0.1174,  ..., -1.3894,  0.1197,  0.0456],
         [-0.0719, -0.3338,  0.1104,  ..., -0.1441, -0.5059, -0.0927],
         [-0.1520,  0.1555,  0.0091,  ..., -0.2911, -0.2647, -0.1234],
         ...,
         [ 0.0285, -0.0710,  0.0276,  ..., -0.3868, -0.2309, -0.2224],
         [ 0.1120, -0.1583,  0.0054,  ..., -0.4212, -0.1560, -0.0719],
         [ 0.1234, -0.0079,  0.1244,  ..., -0.5618, -0.2492, -0.0845]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0240],
         [ 0.0320],
         [-0.0462],
         [ 0.0761],
         [ 0.0438],
         [ 0.0477],
         [-0.0232],
         [-0.0094],
         [ 0.0227],
         [-0.0047]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3479], device='cuda:0', dtype=torch.flo

validation_step
qid:  5a7f18a35542994959419aa6
para_indexes:  tensor([ 22, 121, 217, 246, 375, 453, 510, 597, 656, 742], device='cuda:0')
sp_para_output:  tensor([[[ 0.0350, -0.1190,  0.0144,  ..., -0.6414,  0.2012,  0.0628],
         [ 0.2480, -0.1273, -0.0124,  ..., -0.6726, -0.1026, -0.0950],
         [-0.2528, -0.2861, -0.0082,  ..., -0.6585,  0.0827,  0.0656],
         ...,
         [ 0.0690,  0.0715, -0.0450,  ..., -0.4959,  0.3543,  0.1399],
         [ 0.1919, -0.2331,  0.0205,  ..., -0.8844, -0.0763,  0.0628],
         [-0.0758,  0.0012, -0.0652,  ..., -0.5870,  0.0948,  0.1300]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0012],
         [ 0.0541],
         [ 0.0717],
         [ 0.0139],
         [ 0.0032],
         [ 0.1296],
         [ 0.0882],
         [ 0.0539],
         [-0.0182],
         [-0.0013]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.2910], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.0695, -0.0499,  0.0799,  ..., -0.0869, -0.1012, -0.2692],
         [-0.1784, -0.0687,  0.0902,  ..., -0.3910, -0.0639,  0.0187],
         [ 0.1671, -0.0709, -0.1275,  ..., -0.4007, -0.1322, -0.0479],
         ...,
         [ 0.0855,  0.1894, -0.0487,  ..., -0.4533, -0.1993,  0.0842],
         [ 0.1517, -0.0047,  0.0960,  ...,  0.0293, -0.1932,  0.0468],
         [ 0.2095, -0.1763,  0.1087,  ..., -0.1346, -0.2962, -0.1490]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0461],
         [-0.0131],
         [-0.0413],
         [-0.0114],
         [ 0.0310],
         [-0.0375],
         [-0.0458],
         [ 0.0014],
         [-0.0220],
         [ 0.0163]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3899], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3899], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

para_sent_logits: tensor([ 0.0591, -0.0016], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0703, 0.0378, 0.1193, 0.0287, 0.0575], device='cuda:0')
para_sents_offset: [0, 3, 4, 6, 7, 9]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0458, 0.0268], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0703, 0.0378, 0.1193, 0.0287, 0.0575, 0.0727], device='cuda:0')
para_sents_offset: [0, 3, 4, 6, 7, 9, 11]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], devi

         0.1370,  0.0404], device='cuda:0')
para_sents_offset: [0, 3, 6, 7, 10, 13, 17, 19, 21, 23, 27]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([], device='cuda:0', dtype=torch.int64), 7: tensor([], device='cuda:0', dtype=torch.int64), 8: tensor([1], device='cuda:0'), 9: tensor([], device='cuda:0', dtype=torch.int64)}
sp_para_pred: tensor([8, 9], device='cuda:0')
validation_step
qid:  5ae095f055429924de1b7103
para_indexes:  tensor([ 23, 167, 204, 230, 258, 296, 329, 389, 446, 501], device='cuda:0')
sp_para_output:  tensor([[[ 0.0264,  0.2172, -0.1093,  ..., -0.6086, -0.0762,  0.0704],
         [-0.0125, -0.0953, -0.0635,  ..., -0.5338, -0.2210,  0.0908],
         [-0.1231, -0.1222,  0.0266,  ..., -0.4877, -0.11

validation_step
qid:  5a7f7e3a5542992097ad2f89
para_indexes:  tensor([ 30, 128, 189, 318, 471, 612, 681, 745, 801, 863], device='cuda:0')
sp_para_output:  tensor([[[ 0.1346, -0.0327,  0.1874,  ..., -0.7534,  0.0848,  0.3427],
         [ 0.2932, -0.2048,  0.0331,  ..., -0.1527, -0.1190,  0.1189],
         [ 0.1448, -0.1585, -0.0481,  ..., -0.4288, -0.4791, -0.0859],
         ...,
         [ 0.0397,  0.1658,  0.0253,  ..., -0.5859, -0.1451, -0.0616],
         [ 0.2046,  0.1018,  0.4047,  ..., -0.6171,  0.2287, -0.0867],
         [-0.1515,  0.2975, -0.1433,  ..., -0.2422,  0.2253,  0.0996]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0703],
         [0.1088],
         [0.0439],
         [0.0428],
         [0.0244],
         [0.1333],
         [0.0087],
         [0.0045],
         [0.0423],
         [0.0375]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3186], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

validation_step
qid:  5a7570a65542996c70cfaefd
para_indexes:  tensor([ 32, 170, 361, 429, 520, 601, 917], device='cuda:0')
sp_para_output:  tensor([[[ 0.0255, -0.1543,  0.0852,  ..., -0.5959,  0.0321,  0.2361],
         [-0.0414, -0.0779, -0.1221,  ..., -0.7727, -0.0987,  0.2245],
         [-0.3135, -0.2590, -0.0806,  ..., -0.3726,  0.0366,  0.2695],
         ...,
         [ 0.0241, -0.1592,  0.0607,  ..., -0.6511, -0.0571,  0.1164],
         [ 0.1008, -0.0073, -0.0273,  ..., -0.7330, -0.1213,  0.1683],
         [-0.1866,  0.0136,  0.0416,  ..., -0.6230, -0.0029,  0.1264]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0527],
         [-0.0022],
         [-0.0305],
         [-0.0613],
         [-0.0650],
         [ 0.0330],
         [-0.0577]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3118], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3118], devi

validation_step
qid:  5a84c6df5542997b5ce3ff61
para_indexes:  tensor([ 21,  54, 136, 169, 216, 307, 328, 359, 419, 490], device='cuda:0')
sp_para_output:  tensor([[[ 0.3637,  0.1824,  0.1715,  ..., -0.4145, -0.2686, -0.1336],
         [ 0.1455,  0.0114, -0.0036,  ..., -0.3269, -0.2072,  0.1024],
         [ 0.1292, -0.0759,  0.1587,  ...,  0.0805, -0.0422, -0.0244],
         ...,
         [ 0.0450, -0.0612,  0.0348,  ..., -0.3333, -0.0594,  0.0970],
         [ 0.1787, -0.0907,  0.1996,  ..., -0.0139, -0.0032,  0.1656],
         [ 0.0269, -0.0470,  0.1359,  ..., -0.3318, -0.2056, -0.0543]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0006],
         [ 0.0293],
         [-0.0090],
         [-0.0015],
         [ 0.0715],
         [ 0.0024],
         [ 0.0285],
         [ 0.0412],
         [-0.0188],
         [ 0.0625]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3704], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a832ad55542990548d0b1b5
para_indexes:  tensor([ 21,  68,  89, 138, 249, 362, 403, 492, 528, 581], device='cuda:0')
sp_para_output:  tensor([[[ 0.0881,  0.0079,  0.2933,  ..., -0.1115,  0.0681,  0.2503],
         [-0.3190, -0.2991,  0.0238,  ...,  0.1771, -0.1768,  0.1148],
         [ 0.0376, -0.0287,  0.2004,  ..., -0.2974,  0.1564,  0.0744],
         ...,
         [ 0.1475,  0.0211,  0.1050,  ..., -0.3904,  0.0036, -0.1338],
         [ 0.0105, -0.1694,  0.1793,  ..., -0.2478, -0.2502,  0.2608],
         [-0.1907,  0.0584,  0.2693,  ..., -0.1385, -0.0544, -0.0451]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0367],
         [ 0.0097],
         [ 0.0847],
         [ 0.0093],
         [-0.0175],
         [-0.0027],
         [-0.0646],
         [ 0.0360],
         [-0.0104],
         [ 0.0508]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3523], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits_sum: tensor([0.0693, 0.1115, 0.3909], device='cuda:0')
para_sents_offset: [0, 1, 3, 11]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([4], device='cuda:0')}
para_sent_logits: tensor([0.0475], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0693, 0.1115, 0.3909, 0.0475], device='cuda:0')
para_sents_offset: [0, 1, 3, 11, 12]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([4], device='cuda:0'), 3: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0548,  0.0388, -0.0005,  0.0815,  0.0481, -0.0091,  0.0546,  0.0021],
       device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0693, 0.1115, 0.3909, 0.0475, 0.2703], device='cuda:0')
para_sents_offset: [0, 1, 3, 11, 12, 20]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.in

validation_step
qid:  5add3acb5542990dbb2f7ddc
para_indexes:  tensor([ 20, 108, 161, 259, 290, 319, 425, 499, 547], device='cuda:0')
sp_para_output:  tensor([[[ 1.9979e-01,  6.1526e-04,  2.4899e-02,  ..., -7.8292e-01,
          -3.2828e-01,  1.1708e-01],
         [ 1.6804e-01, -7.1841e-02, -5.3977e-02,  ..., -7.5549e-01,
          -1.4783e-02, -1.0640e-01],
         [ 5.3284e-02,  8.3853e-02, -3.7460e-03,  ..., -4.5506e-01,
          -9.4930e-02,  4.3718e-02],
         ...,
         [-2.7747e-01, -7.5752e-02,  5.8848e-02,  ..., -1.3598e-01,
           9.2723e-02,  3.7357e-01],
         [-7.7693e-02, -5.4203e-02, -4.0529e-02,  ..., -4.2336e-01,
          -3.0248e-02,  1.2413e-01],
         [-3.0867e-02,  2.6618e-01,  5.1477e-02,  ..., -2.3998e-01,
          -8.8888e-02,  1.3788e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[0.0272],
         [0.0968],
         [0.0362],
         [0.0055],
         [0.0083],
         [0.0091],
         [0.0453],
         [0.1055],
         [0.0503

validation_step
qid:  5ac3b9c5554299657fa2911b
para_indexes:  tensor([ 25,  58, 130, 258, 324, 390, 451, 525, 563, 603], device='cuda:0')
sp_para_output:  tensor([[[ 0.0213,  0.2185, -0.0575,  ..., -0.8425, -0.2969, -0.0444],
         [ 0.2936,  0.0520,  0.0420,  ..., -0.5337, -0.4426, -0.0068],
         [ 0.2008, -0.2775, -0.0522,  ..., -0.1544, -0.3153,  0.2504],
         ...,
         [-0.0143,  0.3280,  0.0332,  ..., -0.3685, -0.0863,  0.0876],
         [ 0.1793,  0.0804,  0.0804,  ..., -0.5685, -0.2865,  0.0652],
         [-0.0458, -0.0475,  0.0263,  ..., -0.4108, -0.0618,  0.1217]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0562],
         [ 0.0217],
         [ 0.0241],
         [-0.0060],
         [-0.0210],
         [-0.0139],
         [ 0.0686],
         [-0.0174],
         [ 0.0132],
         [ 0.0580]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3093], device='cuda:0', dtype=torch.float16), 'start_log

validation_step
qid:  5a7e689f55429934daa2fc1c
para_indexes:  tensor([ 33,  77, 130, 176, 323, 447, 549, 606, 653, 707], device='cuda:0')
sp_para_output:  tensor([[[-0.0917,  0.1523, -0.0991,  ..., -0.1837, -0.0690,  0.2021],
         [ 0.0983, -0.2400, -0.2241,  ..., -0.8447, -0.1528, -0.0983],
         [-0.1205,  0.3662, -0.0638,  ..., -0.5576, -0.2758, -0.1279],
         ...,
         [ 0.0977, -0.1214,  0.2502,  ..., -0.7805, -0.0403, -0.1732],
         [ 0.1271,  0.2498, -0.0995,  ..., -0.7036, -0.3651, -0.1223],
         [-0.1373,  0.1234, -0.0558,  ..., -0.1206,  0.3248,  0.1844]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0123],
         [0.0393],
         [0.0482],
         [0.0370],
         [0.0657],
         [0.1018],
         [0.0568],
         [0.1180],
         [0.0071],
         [0.0776]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3884], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

para_sent_logits_sum: tensor([ 0.0916,  0.2809,  0.0292,  0.0331, -0.0080,  0.0823], device='cuda:0')
para_sents_offset: [0, 3, 7, 9, 10, 12, 15]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0, 2], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([0], device='cuda:0')}
para_sent_logits: tensor([0.0349, 0.0737], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.0916,  0.2809,  0.0292,  0.0331, -0.0080,  0.0823,  0.1086],
       device='cuda:0')
para_sents_offset: [0, 3, 7, 9, 10, 12, 15, 17]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([0, 2], device='cuda:0'), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([0], device='cuda:0'), 6: tensor([], dev

sp_para_output:  tensor([[[ 0.2504, -0.1880,  0.2499,  ..., -0.3069, -0.3134,  0.0369],
         [ 0.1019, -0.2107,  0.1858,  ..., -0.4670,  0.0289,  0.0816],
         [ 0.0945, -0.0986, -0.0950,  ..., -0.4140, -0.2276, -0.0157],
         ...,
         [-0.2577, -0.1486,  0.1039,  ..., -0.2239, -0.1014, -0.0406],
         [-0.0628, -0.2457,  0.1304,  ..., -0.3211, -0.0819,  0.1980],
         [ 0.1691, -0.2043,  0.2560,  ..., -0.5070, -0.2261, -0.0109]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0363],
         [ 0.0938],
         [-0.0082],
         [ 0.0269],
         [-0.0048],
         [ 0.0088],
         [ 0.0256],
         [ 0.0952],
         [ 0.0440]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.4084], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.4084], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4, 

evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0170], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0032, -0.0473, -0.0452, -0.0183, -0.0299,  0.0170], device='cuda:0')
para_sents_offset: [0, 1, 2, 3, 4, 5, 6]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([], device='cuda:0', dtype=torch.int64), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([-0.0193], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.0032, -0.0473, -0.0452, -0.0183, -0.0299,  0.0170, -0.0193],
       devi

validation_step
qid:  5ac3b8b8554299741d48a301
para_indexes:  tensor([ 19,  98, 153, 184, 355, 376, 419, 426, 450, 480], device='cuda:0')
sp_para_output:  tensor([[[ 0.2088,  0.1638,  0.0526,  ..., -0.3058, -0.1871,  0.1908],
         [-0.0378, -0.0884, -0.0082,  ..., -0.3398,  0.0325,  0.1145],
         [ 0.0237, -0.0963, -0.0442,  ..., -0.1997, -0.1603, -0.0377],
         ...,
         [-0.0979, -0.0803,  0.0429,  ..., -0.1565, -0.0803,  0.0882],
         [-0.2572,  0.0587,  0.0065,  ..., -0.3226, -0.0856,  0.1413],
         [-0.0198,  0.0420,  0.0315,  ..., -0.3208,  0.0363,  0.2143]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[0.0139],
         [0.0738],
         [0.0394],
         [0.0251],
         [0.0282],
         [0.0040],
         [0.0032],
         [0.1003],
         [0.1470],
         [0.0887]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3171], device='cuda:0', dtype=torch.float16), 'start_logit': -1000

s_to_p_map:  [0, 1, 2, 2, 2, 2, 3, 4, 4, 4, 5, 5, 6, 7, 8, 9, 9, 9, 9]
para_sent_logits: tensor([0.0723], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0723], device='cuda:0')
para_sents_offset: [0, 1]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0569], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0723, 0.0569], device='cuda:0')
para_sents_offset: [0, 1, 2]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0866,  0.0924,  0.0616, -0.0111], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.0723, 0.0569, 0.2296], device='cuda:0')
para_sents_offset: [0, 1, 2, 6]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor

validation_step
qid:  5a8712095542991e771816af
para_indexes:  tensor([ 20,  91, 154, 202, 336, 453, 548, 606, 701, 790], device='cuda:0')
sp_para_output:  tensor([[[ 0.2354,  0.1406,  0.1903,  ..., -0.5822, -0.1039,  0.1087],
         [ 0.0778, -0.1930,  0.1774,  ..., -0.3416,  0.0918, -0.0313],
         [ 0.0573, -0.1855,  0.0408,  ..., -0.7306, -0.1021,  0.2174],
         ...,
         [-0.0680, -0.0796,  0.0235,  ..., -0.5620, -0.0566,  0.3317],
         [ 0.0656, -0.3796,  0.0794,  ..., -0.5953, -0.1056,  0.0064],
         [-0.1820, -0.0853, -0.0580,  ..., -0.5011, -0.0833,  0.2903]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0344],
         [ 0.0108],
         [ 0.0038],
         [ 0.0690],
         [-0.0363],
         [ 0.0728],
         [ 0.0300],
         [ 0.0967],
         [ 0.0318],
         [-0.0795]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3347], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[-6.4828e-02,  2.9111e-01, -2.2761e-01,  ..., -7.1890e-01,
          -2.2969e-01,  1.4830e-02],
         [-1.3434e-01,  6.2418e-01, -4.7269e-01,  ..., -6.4317e-01,
           3.6095e-01,  2.2992e-02],
         [ 3.1656e-01, -1.1963e-02, -6.3581e-02,  ..., -5.8208e-01,
          -2.3930e-01,  7.3372e-02],
         ...,
         [ 2.1232e-02,  6.0040e-02, -1.3893e-01,  ..., -5.3706e-01,
          -1.4563e-01, -7.4821e-02],
         [ 3.9728e-02,  1.6816e-01,  2.1687e-03,  ..., -5.7676e-01,
          -7.1825e-02, -3.9788e-04],
         [ 2.3844e-01,  1.9584e-01, -1.1586e-01,  ..., -5.9378e-01,
          -1.1222e-01,  1.6808e-01]]], device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0934],
         [ 0.0853],
         [-0.0537],
         [-0.0381],
         [-0.0880],
         [ 0.0727],
         [ 0.0380],
         [ 0.0251],
         [-0.0317],
         [-0.0848]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor(

validation_step
qid:  5ae6793e5542991bbc9760e2
para_indexes:  tensor([  39,  239,  336,  461,  577,  689,  793,  927, 1073, 1168],
       device='cuda:0')
sp_para_output:  tensor([[[-0.2578,  0.2568,  0.1205,  ..., -1.0538, -0.2331,  0.3137],
         [ 0.0996,  0.0161,  0.0541,  ..., -0.7639, -0.2241,  0.1008],
         [-0.0347,  0.1896,  0.2057,  ..., -0.7237, -0.1385,  0.0874],
         ...,
         [ 0.1530, -0.0207,  0.0324,  ..., -0.7975, -0.3892,  0.0726],
         [ 0.0174,  0.3132, -0.0287,  ..., -0.5863,  0.2158,  0.4714],
         [ 0.1185,  0.2133, -0.0114,  ..., -0.4188, -0.4133, -0.2325]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0460],
         [ 0.0288],
         [ 0.0002],
         [ 0.0523],
         [ 0.0213],
         [-0.0137],
         [ 0.0082],
         [-0.0603],
         [-0.0028],
         [-0.0576]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3638], device='cuda:0', dtype=torch.flo

sp_para_output:  tensor([[[ 0.0283,  0.2333,  0.0398,  ..., -0.5618, -0.0212,  0.2458],
         [-0.1423,  0.1347,  0.0648,  ..., -0.2516, -0.1435,  0.1406],
         [-0.2030,  0.2127, -0.0232,  ..., -0.1730, -0.0888,  0.1571],
         ...,
         [-0.2896,  0.0785,  0.0383,  ..., -0.1536,  0.1336,  0.1835],
         [-0.2422, -0.1322,  0.1751,  ..., -0.0054,  0.0054,  0.0672],
         [-0.1163, -0.0945,  0.2247,  ..., -0.2249, -0.2401,  0.0774]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0470],
         [ 0.0705],
         [ 0.0448],
         [ 0.0428],
         [ 0.0511],
         [ 0.0760],
         [ 0.0972],
         [-0.0246]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3452], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3452], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5, 6, 7]

sp_para_output:  tensor([[[-0.2594,  0.0670,  0.0926,  ..., -1.0360,  0.1329,  0.2571],
         [-0.1478,  0.5374, -0.1877,  ..., -0.9566,  0.1080,  0.2825],
         [-0.1082,  0.2161, -0.0757,  ..., -0.8383,  0.0603, -0.0540],
         ...,
         [ 0.0475,  0.0798,  0.1304,  ..., -0.3211, -0.2546, -0.0655],
         [ 0.1492,  0.0323, -0.0188,  ..., -0.8251, -0.3405,  0.0250],
         [-0.1765,  0.5744, -0.3370,  ..., -1.0249,  0.1042,  0.1350]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.1339],
         [ 0.0418],
         [ 0.0418],
         [ 0.0665],
         [-0.0391],
         [-0.0387],
         [ 0.0714],
         [ 0.0838],
         [ 0.0086],
         [-0.0408]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3799], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3799], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 1, 1, 2

validation_step
qid:  5a793ea55542994bb9456fce
para_indexes:  tensor([ 18,  59, 119, 195], device='cuda:0')
sp_para_output:  tensor([[[-0.0396, -0.0470,  0.1779,  ..., -0.5523,  0.2773,  0.1902],
         [-0.2421,  0.0457,  0.3748,  ..., -0.3288,  0.1304,  0.1954],
         [-0.2201,  0.0193,  0.3192,  ..., -0.7012,  0.0527, -0.0821],
         [-0.2653, -0.1078,  0.2868,  ..., -0.0779,  0.2365,  0.4787]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0216],
         [ 0.0817],
         [ 0.0363],
         [ 0.0232]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3408], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3408], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3]
para_sent_logits: tensor([-0.1152], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([-0.1152], device='cuda:0')
para_sents_off

validation_step
qid:  5ae3281a5542991a06ce9939
para_indexes:  tensor([ 19,  94, 155, 183, 237, 267, 317, 427, 591, 651], device='cuda:0')
sp_para_output:  tensor([[[ 0.2142,  0.0016, -0.0834,  ..., -0.8987, -0.1022,  0.2343],
         [ 0.0088,  0.0766,  0.0828,  ..., -0.5986, -0.1811,  0.2720],
         [-0.0193,  0.2134,  0.1686,  ..., -0.5220, -0.4367,  0.2779],
         ...,
         [-0.2216, -0.0834, -0.0418,  ..., -0.3841,  0.0011,  0.2338],
         [-0.0442,  0.0409,  0.1707,  ..., -0.6712,  0.0110,  0.1994],
         [ 0.2324, -0.1435, -0.0974,  ..., -0.4722, -0.2366,  0.0719]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0197],
         [ 0.1290],
         [-0.0072],
         [ 0.0358],
         [ 0.0804],
         [-0.0215],
         [ 0.0129],
         [ 0.0526],
         [ 0.0973],
         [ 0.0354]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3640], device='cuda:0', dtype=torch.float16), 'start_log

para_sent_logits_sum: tensor([ 0.1702, -0.0017,  0.0196,  0.1214,  0.1151,  0.0150], device='cuda:0')
para_sents_offset: [0, 2, 3, 5, 9, 11, 14]
evidence_candidates: {0: tensor([1], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([0], device='cuda:0'), 5: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([ 0.0743,  0.1081, -0.0877], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([ 0.1702, -0.0017,  0.0196,  0.1214,  0.1151,  0.0150,  0.0947],
       device='cuda:0')
para_sents_offset: [0, 2, 3, 5, 9, 11, 14, 17]
evidence_candidates: {0: tensor([1], device='cuda:0'), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], device='cuda:0', dtype=torch.int64), 3: tensor([], device='cuda:0', dtype=torch.int64), 4: tensor([0], device='cuda:0'), 5: tensor([], device='cuda:0', dtype=torch.int64), 6: tensor([1],

validation_step
qid:  5ae2c4b8554299495565db31
para_indexes:  tensor([ 19,  83, 116, 205, 253, 406, 520, 593, 640, 726], device='cuda:0')
sp_para_output:  tensor([[[-0.1053,  0.2461,  0.0656,  ..., -0.6947, -0.2261, -0.1086],
         [-0.0844,  0.2142,  0.0392,  ..., -0.6198,  0.0757,  0.0208],
         [-0.0997,  0.1209,  0.0312,  ..., -0.4560, -0.1017, -0.0348],
         ...,
         [-0.1518,  0.1179,  0.2132,  ..., -0.2533, -0.0362, -0.0779],
         [-0.0720, -0.0408,  0.2300,  ..., -0.3759, -0.0626,  0.1694],
         [-0.3840,  0.1152,  0.0224,  ..., -0.1289,  0.0974,  0.0300]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[ 0.0360],
         [ 0.1405],
         [ 0.0596],
         [ 0.0756],
         [-0.0281],
         [ 0.0187],
         [ 0.0626],
         [ 0.1097],
         [ 0.0311],
         [ 0.0605]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3538], device='cuda:0', dtype=torch.float16), 'start_log

sp_para_output:  tensor([[[ 0.1271, -0.1241, -0.0244,  ..., -0.0786, -0.2288,  0.1591],
         [ 0.1586,  0.0855,  0.1464,  ..., -0.0856, -0.4103, -0.0853],
         [ 0.0838,  0.2517, -0.0206,  ..., -0.0635, -0.2754, -0.2030],
         ...,
         [-0.0178, -0.0854,  0.1455,  ...,  0.3026, -0.3787,  0.1865],
         [ 0.2170, -0.1700,  0.1635,  ..., -0.1316, -0.1628,  0.1072],
         [ 0.1903, -0.0969,  0.0095,  ..., -0.3877, -0.3299,  0.0130]]],
       device='cuda:0')
sp_para_output_t:  tensor([[[-0.0100],
         [ 0.0279],
         [-0.0500],
         [-0.0179],
         [-0.0550],
         [-0.0078],
         [-0.0142],
         [-0.0392],
         [ 0.0097],
         [ 0.0724]]], device='cuda:0', dtype=torch.float16)
decode
p_type:  1
answers:  [{'text': 'yes', 'score': tensor([0.3235], device='cuda:0', dtype=torch.float16), 'start_logit': -1000000, 'end_logit': -1000000, 'p_type_score': tensor([0.3235], device='cuda:0', dtype=torch.float16)}]
s_to_p_map:  [0, 0, 0, 1, 1

s_to_p_map:  [0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7]
para_sent_logits: tensor([-0.0134, -0.0649,  0.0292,  0.0585,  0.0397], device='cuda:0',
       dtype=torch.float16)
para_sent_logits_sum: tensor([0.0491], device='cuda:0')
para_sents_offset: [0, 5]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0582], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0491, 0.0582], device='cuda:0')
para_sents_offset: [0, 5, 6]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64)}
para_sent_logits: tensor([0.0736, 0.0345, 0.0121], device='cuda:0', dtype=torch.float16)
para_sent_logits_sum: tensor([0.0491, 0.0582, 0.1202], device='cuda:0')
para_sents_offset: [0, 5, 6, 9]
evidence_candidates: {0: tensor([], device='cuda:0', dtype=torch.int64), 1: tensor([], device='cuda:0', dtype=torch.int64), 2: tensor([], dev


Epoch 00000: avg_val_f1 reached 0.03306 (best 0.03306), saving model to /xdisk/msurdeanu/fanluo/hotpotQA/Data/jupyter-hotpotqa/hotpotqa-longformer_jupyter/_ckpt_epoch_0.ckpt as top 5


before sync --> sizes:  605, 605, 605
after sync --> sizes: 605, 605, 605
avg_loss:  tensor(26.0685, device='cuda:0')	avg_answer_loss:  tensor(5.6174, device='cuda:0')	avg_type_loss:  tensor(1.2928, device='cuda:0')	avg_val_f1:  0.03305785123966942	avg_val_em:  0.03305785123966942	avg_val_prec:  0.03305785123966942	avg_val_recall:  0.03305785123966942
avg_val_sp_sent_f1:  tensor(0.0231, device='cuda:0')	avg_val_sp_sent_em:  tensor(0.0017, device='cuda:0')	avg_val_sp_sent_prec:  tensor(0.0299, device='cuda:0')	avg_val_sp_sent_recall:  tensor(0.0202, device='cuda:0')
avg_val_joint_f1:  tensor(0., device='cuda:0')	avg_val_joint_em:  tensor(0., device='cuda:0')	avg_val_joint_prec:  tensor(0., device='cuda:0')	avg_val_joint_recall:  tensor(0., device='cuda:0')




para_indexes:  tensor([ 13, 108, 151, 286, 366, 445, 493, 576, 663, 705], device='cuda:0')
sp_para_output:  tensor([[[ 0.2664, -0.0583,  0.2572,  ..., -0.2416, -0.1606, -0.0776],
         [ 0.2585, -0.2088, -0.1112,  ..., -0.6099, -0.2215, -0.1790],
         [ 0.3700, -0.0534, -0.0426,  ..., -0.3280,  0.2142,  0.0170],
         ...,
         [ 0.2022, -0.2643, -0.0242,  ..., -0.3269, -0.0853, -0.1623],
         [ 0.4907, -0.2572, -0.1964,  ...,  0.0535,  0.2122,  0.4208],
         [-0.2688, -0.1772,  0.0311,  ...,  0.5040, -0.6781, -0.2267]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[-0.0270],
         [ 0.0098],
         [-0.0814],
         [-0.0797],
         [-0.0699],
         [ 0.0189],
         [ 0.0531],
         [-0.0237],
         [-0.0089],
         [ 0.0680]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([  22,  275,  642,  711,  854,  954, 1016, 1262, 1479, 1603],
       device='cuda:0')
sp_

para_indexes:  tensor([  24,  145,  383,  527,  607,  680,  846,  944,  999, 1072],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.5276,  0.0040,  0.0729,  ..., -0.6513, -0.3174, -0.1562],
         [ 0.1952, -0.2785, -0.4133,  ..., -0.6020, -0.3584, -0.1997],
         [ 0.0871, -0.1591, -0.3426,  ..., -0.3181, -0.2055, -0.4354],
         ...,
         [-0.1067, -0.0908, -0.0496,  ..., -0.1844,  0.1144,  0.4266],
         [ 0.1868,  0.1316, -0.2481,  ..., -0.5932, -0.0403,  0.1298],
         [ 0.3690,  0.0268, -0.1540,  ..., -0.5421, -0.4566,  0.0219]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[ 0.1203],
         [ 0.0736],
         [-0.1406],
         [-0.0442],
         [ 0.0322],
         [-0.0097],
         [-0.0533],
         [-0.0316],
         [ 0.0573],
         [-0.0823]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([  19,  106,  186,  286,  389,  493,  737,  847,  981, 1064],
       dev

para_indexes:  tensor([  22,  123,  225,  306,  392,  519,  575,  803,  922, 1002],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.2080,  0.2085,  0.1662,  ..., -0.5029, -0.2453, -0.1844],
         [ 0.2033,  0.2753, -0.0631,  ..., -0.3572, -0.2548,  0.0601],
         [-0.0911,  0.0303,  0.1920,  ..., -0.3040,  0.1350,  0.0505],
         ...,
         [ 0.2399,  0.4422,  0.0113,  ..., -0.1617, -0.1093,  0.1198],
         [ 0.3417,  0.3362, -0.1435,  ..., -0.2144,  0.2548,  0.0566],
         [ 0.1826,  0.1804,  0.2121,  ..., -0.0693, -0.1352,  0.0736]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[-0.0451],
         [-0.0010],
         [-0.0278],
         [-0.0551],
         [-0.0385],
         [-0.0614],
         [-0.0257],
         [-0.1885],
         [-0.1042],
         [-0.0222]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([  31,   92,  193,  291,  328,  448,  602,  660,  983, 1154],
       dev

sp_para_output:  tensor([[[ 0.2410,  0.0687,  0.0430,  ..., -0.3658,  0.4884,  0.0949],
         [ 0.4063, -0.0758, -0.1350,  ..., -0.1342, -0.2184,  0.0239],
         [ 0.2746,  0.1691, -0.1098,  ...,  0.0487, -0.2145, -0.0267],
         ...,
         [ 0.7269,  0.1560,  0.3131,  ...,  0.4360, -0.3873,  0.0127],
         [ 0.5531,  0.3654,  0.0545,  ..., -0.3380,  0.0891, -0.1966],
         [ 0.3784,  0.2024, -0.0115,  ..., -0.5748, -0.1669, -0.2932]]],
       device='cuda:0', grad_fn=<IndexBackward>)
sp_para_output_t:  tensor([[[-0.0439],
         [ 0.0128],
         [-0.1125],
         [-0.1511],
         [ 0.0122],
         [-0.0479],
         [-0.1802],
         [-0.2595],
         [-0.0051],
         [-0.0509]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddBackward0>)
para_indexes:  tensor([  17,  164,  219,  457,  591,  882, 1138, 1299, 1438, 1534],
       device='cuda:0')
sp_para_output:  tensor([[[ 0.0862, -0.1139,  0.1691,  ..., -0.4558,  0.0210, -0.0235],
      

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

In [None]:
#     trainer.test(model)

In [None]:
### To install apex ### 
#     !git clone https://github.com/NVIDIA/apex
#     os.chdir("/xdisk/msurdeanu/fanluo/hotpotQA/apex/")
#     !module load cuda101/neuralnet/7/7.6.4  
#     !module load cuda10.1/toolkit/10.1.243 
#     !conda install -c conda-forge cudatoolkit-dev --yes
#     !conda install -c conda-forge/label/cf201901 cudatoolkit-dev --yes
#     !conda install -c conda-forge/label/cf202003 cudatoolkit-dev --yes
#     !which nvcc
#     !python -m pip install -v --no-cache-dir ./
#     os.chdir("/xdisk/msurdeanu/fanluo/hotpotQA/")

In [39]:
# debug: check args
import shlex
argString ='--train_dataset small.json --dev_dataset small_end.json  \
    --gpus 0 --num_workers 4 \
    --max_seq_len 4096 --doc_stride -1  \
    --save_prefix hotpotqa-longformer_jupyter  --model_path /xdisk/msurdeanu/fanluo/hotpotQA/longformer-base-4096'
# hotpot_dev_distractor_v1.json
#  --train_dataset /xdisk/msurdeanu/fanluo/hotpotQA/Data/reduced_questions/hotpot_reduced_context_04-08-2021-01:12:53/hotpot_train_reduced_context_coref_fuzzy.json --dev_dataset /xdisk/msurdeanu/fanluo/hotpotQA/Data/reduced_questions/hotpot_reduced_context_04-08-2021-01:12:53/hotpot_dev_reduced_context_coref_fuzzy.json  \ 

import argparse 
if __name__ == "__main__":
    main_arg_parser = argparse.ArgumentParser(description="hotpotqa")
    parser = hotpotqa.add_model_specific_args(main_arg_parser, os.getcwd())
    args = parser.parse_args(shlex.split(argString)) 
    for arg in vars(args):
        print((arg, getattr(args, arg)))
    main(args)


('save_dir', 'jupyter-hotpotqa')
('save_prefix', 'hotpotqa-longformer_jupyter')
('train_dataset', 'small.json')
('dev_dataset', 'small_end.json')
('batch_size', 2)
('gpus', '0')
('warmup', 1000)
('lr', 5e-05)
('val_every', 1.0)
('val_percent_check', 1.0)
('num_workers', 4)
('seed', 1234)
('epochs', 6)
('max_seq_len', 4096)
('max_doc_len', 4096)
('max_num_answers', 64)
('max_question_len', 55)
('doc_stride', -1)
('ignore_seq_with_no_answers', False)
('disable_checkpointing', False)
('n_best_size', 20)
('max_answer_length', 30)
('regular_softmax_loss', False)
('test', False)
('model_path', '/xdisk/msurdeanu/fanluo/hotpotQA/longformer-base-4096')
('no_progress_bar', False)
('attention_mode', 'sliding_chunks')
('fp32', False)
('train_percent', 1.0)


### Sandbox

In [None]:
import torch
from longformer.longformer import Longformer, LongformerConfig
from longformer.sliding_chunks import pad_to_window_size
from transformers import RobertaTokenizer

config = LongformerConfig.from_pretrained('/xdisk/msurdeanu/fanluo/hotpotQA/longformer-base-4096') 
# choose the attention mode 'n2', 'tvm' or 'sliding_chunks'
# 'n2': for regular n2 attantion
# 'tvm': a custom CUDA kernel implementation of our sliding window attention
# 'sliding_chunks': a PyTorch implementation of our sliding window attention
config.attention_mode = 'sliding_chunks'

In [None]:
model = Longformer.from_pretrained('/xdisk/msurdeanu/fanluo/hotpotQA/longformer-base-4096', config=config)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
tokenizer.model_max_length = model.config.max_position_embeddings

In [None]:
SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000)  # long input document

input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0)  # batch of size 1

# TVM code doesn't work on CPU. Uncomment this if `config.attention_mode = 'tvm'`
model = model.cuda() 
input_ids = input_ids.cuda()

In [None]:
# Attention mask values -- 0: no attention, 1: local attention, 2: global attention
attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention
attention_mask[:, [1, 4, 21,]] =  2  # Set global attention based on the task. For example,
                                     # classification: the <s> token
                                     # QA: question tokens

# padding seqlen to the nearest multiple of 512. Needed for the 'sliding_chunks' attention
input_ids, attention_mask = pad_to_window_size(
        input_ids, attention_mask, config.attention_window[0], tokenizer.pad_token_id)

In [None]:
output = model(input_ids, attention_mask=attention_mask)[0]
output

In [None]:

!nvidia-smi
!nvidia-smi -L

In [None]:
import torch
crossentropy_average = torch.nn.CrossEntropyLoss(reduction = 'mean', ignore_index=-1) 
crossentropy_weighted_average = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 4.0]), reduction = 'mean', ignore_index=-1) 

sp_para_output_t = torch.tensor([[
         [ 0.0227],
         [ 0.0365],
         [ 0.0054],
         [ 0.0401],
         [ 0.0590],
         [ 0.0134],
         [ 0.0336],
         [-0.0062],
         [ 0.0631],
         [ 0.0811]]])
sp_para_output_aux = torch.zeros(sp_para_output_t.shape, dtype=torch.float) 
predict_support_para = torch.cat([sp_para_output_aux, sp_para_output_t], dim=-1).view(-1,2)
# predict_support_para = torch.cat([sp_para_output_t, sp_para_output_aux], dim=-1).view(-1,2)
predict_support_para

sp_para =  torch.tensor([0, 0, 0, 0, 0, 1, 1, 0, 0, 0])
#sp_para_loss:  tensor(0.7061, device='cuda:0')

crossentropy_average(predict_support_para, sp_para)

crossentropy_weighted_average(predict_support_para, sp_para)

sp_para

positive_index = torch.nonzero(sp_para.view(-1)).view(-1).tolist()

from random import choice
choice([i for i in range(0,9) if i not in poositive_index])

set(range(10))

predict_support_para_1 = torch.tensor([[ 0.0000,  0.0227]])
sp_para_1 =  torch.tensor([0])
loss1 = crossentropy_average(predict_support_para_1, sp_para_1)
loss1

predict_support_para_2 = torch.tensor([[ 0.0000,  0.0365]])
sp_para_2 =  torch.tensor([0])
loss2 = crossentropy_average(predict_support_para_2, sp_para_2)
loss2

predict_support_para_3 = torch.tensor([[ 0.0000,  0.0054]])
sp_para_3 =  torch.tensor([0])
loss3 = crossentropy_average(predict_support_para_3, sp_para_3)
loss3

predict_support_para_4 = torch.tensor([[ 0.0000,  0.0401]])
sp_para_4 =  torch.tensor([0])
loss4 = crossentropy_average(predict_support_para_4, sp_para_4)
loss4

predict_support_para_5 = torch.tensor([[ 0.0000,  0.0590]])
sp_para_5 =  torch.tensor([0])
loss5 = crossentropy_average(predict_support_para_5, sp_para_5)
loss5

predict_support_para_6 = torch.tensor([[ 0.0000,  0.0134]])
sp_para_6 =  torch.tensor([1])
loss6 = crossentropy_average(predict_support_para_6, sp_para_6)
loss6

predict_support_para_7 = torch.tensor([[ 0.0000,  0.0336]])
sp_para_7 =  torch.tensor([1])
loss7 = crossentropy_average(predict_support_para_7, sp_para_7)
loss7

predict_support_para_8 = torch.tensor([[ 0.0000, -0.0062]])
sp_para_8 =  torch.tensor([0])
loss8 = crossentropy_average(predict_support_para_8, sp_para_8)
loss8

predict_support_para_9 = torch.tensor([[ 0.0000,  0.0631]])
sp_para_9 =  torch.tensor([0])
loss9 = crossentropy_average(predict_support_para_9, sp_para_9)
loss9

predict_support_para_10 = torch.tensor([[ 0.0000,  0.0811]])
sp_para_10 =  torch.tensor([0])
loss10 = crossentropy_average(predict_support_para_10, sp_para_10)
loss10

(loss1+loss2+loss3+loss4+loss5+loss6+loss7+loss8+loss9+loss10)/10

(loss1+loss2+loss3+loss4+loss5+4.0*loss6+4.0*loss7+loss8+loss9+loss10)/(8+2*4)

In [None]:
s_to_p_map = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7, 7, 7, 8, 9]
sp_sent_logits = torch.tensor([[[-0.0143],
         [ 0.0416],
         [ 0.1065],
         [-0.0007],
         [ 0.0407],
         [ 0.1273],
         [ 0.0663],
         [ 0.0500],
         [ 0.0630],
         [ 0.1230],
         [ 0.0140],
         [ 0.0205],
         [-0.0505],
         [ 0.0224],
         [ 0.0907],
         [-0.0142],
         [ 0.0180],
         [ 0.0564],
         [-0.0132],
         [ 0.0016],
         [ 0.0327],
         [ 0.0687],
         [ 0.1049],
         [ 0.0568],
         [ 0.0740],
         [ 0.0006],
         [ 0.1159],
         [ 0.0566],
         [ 0.0543],
         [-0.0304],
         [ 0.1315],
         [-0.0143],
         [ 0.0444],
         [-0.0682],
         [ 0.1120],
         [ 0.1247]]])


In [None]:

if(len(s_to_p_map)>0):
    for i in range(s_to_p_map[-1]+1)
        sent_logits_torch.masked_select(sp_sent_logits.squeeze(), torch.tensor([p==0 for p in s_to_p_map]))

In [None]:
list(range(0))
