In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import math, copy, time
from torch.autograd import Variable
import matplotlib.pyplot as plt
from IPython.display import Image
import pandas as pd
import re
import jieba
from torchtext import data,datasets
import random
import os
from tqdm import tqdm
from torchtext.data import Iterator,BucketIterator
from torchtext.vocab import Vectors
from torch.nn import LSTM,LSTMCell,Linear
from transformers import *
import tokenize
import json

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
I0426 02:04:15.757966 140389550884608 file_utils.py:39] PyTorch version 1.0.1 available.
I0426 02:04:16.120102 140389550884608 modeling_xlnet.py:194] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


In [2]:
#使用transformers中的模型加载，需要将bert_config.json 重命名为config.json
bert_model=BertModel.from_pretrained('./bert_pretrain/')
tokenizer = BertTokenizer.from_pretrained("./bert_pretrain/")

I0426 02:04:16.138382 140389550884608 configuration_utils.py:148] loading configuration file ./bert_pretrain/config.json
I0426 02:04:16.139438 140389550884608 configuration_utils.py:168] Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "is_decoder": false,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 16,
  "num_hidden_layers": 3,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "use_bfloat16": false,
  "vocab_size": 21128
}

I0426 02:04:16.141477 140389550884608 mode

In [3]:
BATCH_SIZE=16
CT_MAX_LEN=400
Q_MAX_LEN=100
A_MAX_LEN=10

In [4]:
train_path='./data/train_data.csv'
test_path='./data/train_test.csv'
dev_path='./data/train_dev.csv'

In [5]:
def build_dataset():
    pad_idx=0
    def padcontext(context):
        if len(context)>CT_MAX_LEN:
            context=context[:CT_MAX_LEN]
        else:
            context += [pad_idx for i in range(CT_MAX_LEN - len(context))]
        return context
        
    def padq(question):
        if len(question)>Q_MAX_LEN:
            question=question[:Q_MAX_LEN]
        else:
            question += [pad_idx for i in range(Q_MAX_LEN - len(question))]
        return question
    
    def pada(answer):
        if len(answer)>A_MAX_LEN:
            answer=answer[:A_MAX_LEN]
        else:
            answer += [pad_idx for i in range(A_MAX_LEN - len(answer))]
        return answer
    
    def load_train_dataset(path,num):
        contents = []
        csv_data=pd.read_csv(path,lineterminator='\n')
        i=0
        for id,question,context,answer,answer_start in zip(csv_data['id'],csv_data['question'],csv_data['context'],csv_data['answer'],csv_data['answer_start']):
            if len(context)>400:
                continue
            q_token=tokenizer.tokenize(question)
            q_token_ids=tokenizer.convert_tokens_to_ids(q_token)
            len_q=len(q_token_ids)
#             q_tokenids=padq(q_token_ids)
            
            c_token=tokenizer.tokenize(str(context))
            c_token_ids=tokenizer.convert_tokens_to_ids(c_token)
            len_c=len(c_token_ids)
#             c_tokenids=padcontext(c_token_ids)
            
            a_token=tokenizer.tokenize(str(answer))
            a_token_ids=tokenizer.convert_tokens_to_ids(a_token)
            len_a=len(a_token_ids)
            a_tokenids=pada(a_token_ids)
            
            answer_end=int(answer_start)+len_a
            contents.append((q_token_ids,c_token_ids,a_tokenids,len_q,len_c,len_a,answer_start,answer_end,c_token,id,a_token))
            if num!=0:
                i+=1
                if i>num:
                    break
        return contents
    
    def load_dev_dataset(path,num):
        contents = []
        csv_data=pd.read_csv(path,lineterminator='\n')
        i=0
        for id,question,context,answer in zip(csv_data['id'],csv_data['question'],csv_data['context'],csv_data['answer']):
            if len(context)>400:
                continue
            q_token=tokenizer.tokenize(question)
            q_token_ids=tokenizer.convert_tokens_to_ids(q_token)
            len_q=len(q_token_ids)
#             q_tokenids=padq(q_token_ids)
            
            c_token=tokenizer.tokenize(str(context))
            c_token_ids=tokenizer.convert_tokens_to_ids(c_token)
            len_c=len(c_token_ids)
#             c_tokenids=padcontext(c_token_ids)
    
            contents.append((q_token_ids,c_token_ids,len_q,len_c,c_token,answer,id))
            if num!=0:
                i+=1
                if i>num:
                    break
        return contents
            
    train = load_train_dataset(train_path,num=0)
    dev = load_dev_dataset(dev_path,num=0)
    return train, dev

In [6]:
class DatasetIterater(object):
    def __init__(self, dataset, batch_size, device):
        self.batch_size = batch_size
        self.dataset = dataset
        self.n_batches = len(dataset) // batch_size
        self.residue = False  # 记录batch数量是否为整数
        if len(dataset) % self.n_batches != 0:
            self.residue = True
        self.index = 0
        self.device = device
    
    def _to_tensor(self, datas):
        
        
        c_lens = [len(s[1]) for s in datas]
#         c_pad = torch.zeros(len(datas), max(c_lens)).long()
        c_pad = torch.zeros(len(datas), CT_MAX_LEN).long()
        for i, inp in enumerate(datas):
            c=inp[1]
            end = c_lens[i]
            c_pad[i, :end] = torch.LongTensor(c[end-1::-1])
            
        q_lens = [len(s[0]) for s in datas]
#         q_pad = torch.zeros(len(datas), max(q_lens)).long()
        q_pad = torch.zeros(len(datas), Q_MAX_LEN).long()
        for i, inp in enumerate(datas):
            q=inp[0]
            end = q_lens[i]
            q_pad[i, :end] = torch.LongTensor(q[end-1::-1])
        
        c_pad=c_pad.to(self.device)
        q_pad=q_pad.to(self.device)
#         q = torch.LongTensor([_[0] for _ in datas]).to(self.device)
#         c = torch.LongTensor([_[1] for _ in datas]).to(self.device)
        a = torch.LongTensor([_[2] for _ in datas]).to(self.device)

        # pad前的长度(超过pad_size的设为pad_size)
        q_len = torch.LongTensor([_[3] for _ in datas]).to(self.device)
        c_len = torch.LongTensor([_[4] for _ in datas]).to(self.device)
        a_len = torch.LongTensor([_[5] for _ in datas]).to(self.device)
        a_start = torch.LongTensor([_[6] for _ in datas]).to(self.device)
        a_end = torch.LongTensor([_[7] for _ in datas]).to(self.device)
        c_token=[_[8] for _ in datas]
        id=[_[9] for _ in datas]
        a_token=[_[10] for _ in datas]
        return (q_pad, c_pad, a,q_len,c_len,a_len,a_start,a_end,c_token,id,a_token)

    def __next__(self):
        if self.residue and self.index == self.n_batches:
            batches = self.dataset[self.index * self.batch_size: len(self.dataset)]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

        elif self.index >= self.n_batches:
            self.index = 0
            raise StopIteration
        else:
            batches = self.dataset[self.index * self.batch_size: (self.index + 1) * self.batch_size]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

    def __iter__(self):
        return self

    def __len__(self):
        if self.residue:
            return self.n_batches + 1
        else:
            return self.n_batches
        
class DevDatasetIterater(object):
    def __init__(self, dataset, batch_size, device):
        self.batch_size = batch_size
        self.dataset = dataset
        self.n_batches = len(dataset) // batch_size
        self.residue = False  # 记录batch数量是否为整数
        if len(dataset) % self.n_batches != 0:
            self.residue = True
        self.index = 0
        self.device = device

    def _to_tensor(self, datas):
        c_lens = [len(s[1]) for s in datas]
#         c_pad = torch.zeros(len(datas), max(c_lens)).long()
        c_pad = torch.zeros(len(datas), CT_MAX_LEN).long()
        for i, inp in enumerate(datas):
            c=inp[1]
            end = c_lens[i]
            c_pad[i, :end] = torch.LongTensor(c[end-1::-1])
            
        q_lens = [len(s[0]) for s in datas]
#         q_pad = torch.zeros(len(datas), max(q_lens)).long()
        q_pad = torch.zeros(len(datas), Q_MAX_LEN).long()
        for i, inp in enumerate(datas):
            q=inp[0]
            end = q_lens[i]
            q_pad[i, :end] = torch.LongTensor(q[end-1::-1])
        
        c=c_pad.to(self.device)
        q=q_pad.to(self.device)
        
#         print(datas)
#         q = torch.LongTensor([_[0] for _ in datas]).to(self.device)
#         c = torch.LongTensor([_[1] for _ in datas]).to(self.device)
 
        # pad前的长度(超过pad_size的设为pad_size)
        q_len = torch.LongTensor([_[2] for _ in datas]).to(self.device)
        c_len = torch.LongTensor([_[3] for _ in datas]).to(self.device)
        c_token=[_[4] for _ in datas]
        answer=[_[5] for _ in datas]
        id=[_[6] for _ in datas]
        return (q, c,q_len,c_len,c_token,answer,id)

    def __next__(self):
        if self.residue and self.index == self.n_batches:
            batches = self.dataset[self.index * self.batch_size: len(self.dataset)]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

        elif self.index >= self.n_batches:
            self.index = 0
            raise StopIteration
        else:
            batches = self.dataset[self.index * self.batch_size: (self.index + 1) * self.batch_size]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

    def __iter__(self):
        return self

    def __len__(self):
        if self.residue:
            return self.n_batches + 1
        else:
            return self.n_batches
        
def build_iterator(dataset,data_type,batch_size,device):
    if data_type=='train':
        iter = DatasetIterater(dataset, batch_size, device)
    else:
        iter = DevDatasetIterater(dataset, batch_size, device)

    return iter

In [7]:
#Batch构造函数的输入是src和trg，后者可以为None，因为再预测的时候是没有tgt的。
class Batch:
    def __init__(self, batch,data_type):
        if data_type=='train':
            self.pad=0
            self.q = batch[0]
            self.c=batch[1]
            self.q_mask = (batch[0] != self.pad)
            self.c_mask=(batch[1]!=self.pad)
            self.a=batch[2]
            self.a_mask=(batch[2]!=self.pad)
            self.q_len=batch[3]
            self.c_len=batch[4]
            self.a_len=batch[5]
            self.a_start=batch[6]
            self.a_end=batch[7]
            self.c_token=batch[8]
            self.id=batch[9]
            self.a_token=batch[10]
        else:
            self.pad=0
            self.q = batch[0]
            self.c=batch[1]
            self.q_mask = (batch[0] != self.pad)
            self.c_mask=(batch[1]!=self.pad)
            self.q_len=batch[2]
            self.c_len=batch[3]
            self.c_token=batch[4]
            self.answers=batch[5]
            self.id=batch[6]
            
         
    
def rebatch(batch,data_type):
    return Batch(batch,data_type)

# 定义模型

In [8]:

'''
在每两个子层之间都使用了残差连接(Residual Connection) 和归一化。
'''

# 归一化层
class LayerNorm(nn.Module):
    "构造一个 layernorm 模块 (See citation for details)."
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
    
 #为了能方便地使用这些残差连接，模型中所有的子层和Embedding层的输出都设定成了相同的维度，即d_model=512
# 封装残差连接和归一化，便于代码复用
class SublayerConnection(nn.Module):
    """
     LayerNorm + sublayer(Self-Attenion/Dense) + dropout + 残差连接
     为了简单，把LayerNorm放到了前面，这和原始论文稍有不同，原始论文LayerNorm在最后。
    """
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        #"sublayer是传入的参数，参考DecoderLayer，它可以当成函数调用，这个函数的有一个输入参数"
        return x + self.dropout(sublayer(self.norm(x)))
    
def clones(module, N):
    "生成N个完全一样的层."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
def attention(query, key, value, mask=None, dropout=None):
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)

#     print('attention scores.size:',scores.size())
#     print('attention mask.size:',mask.size())
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
        
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.1):
        "Take in model size and number of heads."
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # W我们假设d_v总是等于d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model), 4)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, query, key, value, mask=None):
        
        if mask is not None:
#             print('mask1.size:',mask.size())
            mask=mask.unsqueeze(-2)
#             print('mask2.size:',mask.size())
            # 所有h个head的mask都是相同的
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)
        
        # 1) 首先使用线性变换，然后把d_model分配给h个Head，每个head为d_k=d_model/h 
        query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2) 
                             for l, x in zip(self.linears, (query, key, value))]
#         print('query.size:',query.size())
#         print('key.size:',key.size())
#         print('value.size:',value.size())
        # 2) 使用attention函数计算,x的shape是(batch, 8, time, 64)，而attn是(batch, 8, time, time)。
        x, self.attn = attention(query, key, value, mask=mask, 
                                 dropout=self.dropout)
        
        # 3) 把8个head的64维向量拼接成一个512的向量。然后再使用一个线性变换(512,521)，shape不变。 
        x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
        return self.linears[-1](x)

#全连接子层，全连接层的输入和输出都是d_model(512)维的，中间隐单元的个数是d_ff(2048)。
class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        #在两个线性变换之间除了ReLu还使用了一个Dropout。
        return self.w_2(self.dropout(F.relu(self.w_1(x))))
#Embeddings，这里除了使用Embedding外还* math.sqrt(self.d_model)
class Embeddings(nn.Module):
    def __init__(self, d_model, vocab):
        super(Embeddings, self).__init__()
        self.lut = nn.Embedding(vocab, d_model)
        self.d_model = d_model

    def forward(self, x):
        return self.lut(x) * math.sqrt(self.d_model)

#位置编码
# 假设输入是ID序列长度为10，如果输入Embedding之后是(10, 512)，那幺位置编码的输出也是(10, 512)。
# 上式中pos就是位置(0-9)，512维的偶数维使用sin函数，而奇数维使用cos函数。
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0., max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0., d_model, 2) *
                             -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        
        # register_buffer通常用于保存一些模型参数之外的值，它不是模型的参数(不用梯度下降)，但是模型会修改它，
        # 而且在预测的时候也要使用它。这里也是类似的，pe是一个提前计算好的常量，我们在forward要用到它。
        # 我们在构造函数里并没有把pe保存到self里，但是在forward的时候我们却可以直接使用它(self.pe)。
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], 
                         requires_grad=False)
        return self.dropout(x)

In [9]:
def make_model(src_vocab, tgt_vocab, N=6, 
               d_model=512, d_ff=2048, h=8, dropout=0.1):
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))
    
    # 随机初始化参数，这非常重要
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model

In [10]:
#初始化lstm模型的参数
def init_lstm_wt(lstm):
    for names in lstm._all_weights:
        for name in names:
            if name.startswith('weight_'):
                wt = getattr(lstm, name)
                wt.data.uniform_(-0.02, 0.02)
            elif name.startswith('bias_'):
                # set forget bias to 1
                bias = getattr(lstm, name)
                n = bias.size(0)
                start, end = n // 4, n // 2
                bias.data.fill_(0.)
                bias.data[start:end].fill_(1.)

In [11]:
class EncoderLayer(nn.Module):
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 2)
        self.size = size
  
    def forward(self, x, mask):
        # MultiAtention->Add&Norm
        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
        
        # FeedForward-》Add&Norm
        return self.sublayer[1](x, self.feed_forward)

class Encoder(nn.Module):
    def __init__(self,bert,emb_dim,hidden_dim):
        super(Encoder, self).__init__()
        
        self.pre_model=bert
        self.hidden_dim=hidden_dim
        for param in self.pre_model.parameters():
            param.requires_grad = False
        
        self.lstm = nn.LSTM(1024, hidden_dim, num_layers=3,
                            bidirectional=True, batch_first=True)

        self.W_h = nn.Linear(hidden_dim * 2, hidden_dim * 2, bias=False)
        self.dropout=0.1
        self.c = copy.deepcopy
        self.attn = MultiHeadedAttention(8, hidden_dim)
        self.ff = PositionwiseFeedForward(hidden_dim, 2048,  self.dropout)
        self.position = PositionalEncoding(hidden_dim, self.dropout)
        self.linear=nn.Linear(1024,hidden_dim, bias=False)
        self.emb=nn.Sequential(self.linear, self.c(self.position))
        self.sublayer=clones(SublayerConnection(hidden_dim,  self.dropout), 2)
        self.N=6
        self.layer=EncoderLayer(hidden_dim, self.c(self.attn), self.c(self.ff), self.dropout)
        self.layers = clones(self.layer, self.N)
        self.norm = LayerNorm(self.layer.size)
        
    #seq_lens should be in descending order
    def forward(self, input_q, input_mask,input_lens):
        print('input_q.size:',input_q.size())
        encoder_out, text_cls = self.pre_model(input_q, attention_mask=input_mask)
        print('encoder_out11 size:',encoder_out.size())
        x = self.emb(encoder_out)
        print('x.size:',x.size())
        for i in self.layers:
            x = self.layer(x, input_mask)
         
        encoder_out=self.norm(x)
    
        print('encoder_out size:',encoder_out.size())
        #pack_padded_sequence 去除pad；lengths需要从大到小排序，batch_first如果设置为true，则x的第一维为batch_size，第二维为seq_length，否则相反。
#         encoder_outputs, hidden = self.lstm(encoder_out)
#         print('encoder_outs size:',encoder_outputs.size())

#         print(hidden[0].size(),hidden[1].size())

#         encoder_out = self.W_h(encoder_out)
        
        return encoder_out

In [12]:
class Decoder(nn.Module):
     def __init__(self,bert,emb_dim,hidden_dim):
        super(Encoder, self).__init__()
        
        self.pre_model=bert
        self.hidden_dim=hidden_dim
        for param in self.pre_model.parameters():
            param.requires_grad = True
        
        self.lstm = nn.LSTM(emb_dim, hidden_dim, num_layers=1, batch_first=True, bidirectional=True)
        
        init_lstm_wt(self.lstm)

        self.p_gen_linear = nn.Linear(hidden_dim * 4 + emb_dim, 1)
        self.x_context = nn.Linear(hidden_dim * 2 + emb_dim, emb_dim)
        self.ac_attention=ACAttention()
        self.ac_att_linear=nn.Linear()

        
     #seq_lens should be in descending order
     def forward(self, input_a, a_mask,input_lens,encoder_outputs, encoder_feature, enc_padding_mask,c_t_1,s_t,qc_attn):
        a_encoder_out, text_cls = self.pre_model(input_a, attention_mask=a_mask)

        #拼接decoder输入与（encoder输出attention后的值）
        x = self.x_context(torch.cat((c_t_1, a_encoder_out), 1))
        
        lstm_out, s_t = self.lstm(x.unsqueeze(1), s_t_1)
        h_decoder, c_decoder = s_t
        s_t_hat = torch.cat((h_decoder.view(-1, self.hidden_dim),
                             c_decoder.view(-1, self.hidden_dim)), 1)  # B x 2*hidden_dim
        c_t, attn_dist = self.attention_network(s_t_hat, encoder_outputs, encoder_feature,enc_padding_mask)
        qc_attn=qc_attn.sequeeze()
        
        

In [13]:
class PointerDecoder(nn.Module):
     def __init__(self,hidden_size,dropout):
        super(PointerDecoder, self).__init__()
        
        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=hidden_size * 3, #*6
                                   hidden_size=hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=dropout)
        init_lstm_wt(self.modeling_LSTM1)


        self.modeling_LSTM2 = LSTM(input_size=hidden_size *2,
                                   hidden_size=hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=dropout)
        init_lstm_wt(self.modeling_LSTM2)


        # 6. Output Layer
        self.p1_weight_g = Linear(hidden_size * 3, 1) #*6
        self.p1_weight_m = Linear(hidden_size * 2 , 1)
        self.p2_weight_g = Linear(hidden_size * 3, 1) #*6
        self.p2_weight_m = Linear(hidden_size * 2, 1)
        
        self.output_LSTM = LSTM(input_size=hidden_size * 2,
                                hidden_size=hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=dropout)
        init_lstm_wt(self.output_LSTM)


     #seq_lens should be in descending order
     def forward(self, attn,c_lens):
        m = self.modeling_LSTM2((self.modeling_LSTM1(attn)[0]))[0]
        # (batch, c_len)
        p1 = (self.p1_weight_g(attn) + self.p1_weight_m(m)).squeeze()
        # (batch, c_len, hidden_size * 2)
        m2 = self.output_LSTM(m)[0]
        # (batch, c_len)
        p2 = (self.p2_weight_g(attn) + self.p2_weight_m(m2)).squeeze()
        return p1, p2 
        
        

In [14]:
class QCAttention(nn.Module):
    def __init__(self,hidden_dim):
        super(QCAttention, self).__init__()
        self.att_weight_c = Linear(hidden_dim , 1)
        self.att_weight_q = Linear(hidden_dim , 1)
        self.att_weight_cq = Linear(hidden_dim , 1)

    def qc_attention(self,q_encoder_feature,c_encoder_feature,c_len,q_len,q_mask,c_mask):
        q_mask = q_mask.unsqueeze(1)
        c_mask = c_mask.unsqueeze(1)
        
        
        key=torch.cat([c_encoder_feature,q_encoder_feature],dim=1)
        query=key
        d_k = query.size(-1)
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)

        cq_attn = F.softmax(scores, dim = -1)        
        cq_attn=cq_attn[:,:c_len,c_len:]  #batch*c_len*q_len
        print('cq_attn',cq_attn.size())
        cq_attn = cq_attn.masked_fill(q_mask == 0, -1e9) # batch*c_len*q_len
        normalization_factor = cq_attn.sum(1, keepdim=True)
        cq_attn = cq_attn / normalization_factor
        
        #value:batch*q_len*(2*hidden)
        cq_weight=torch.matmul(cq_attn, q_encoder_feature)  #batch*c_len*(2*hidden)
        
        qc_attn = F.softmax(torch.max(cq_attn, dim=2)[0], dim=1).unsqueeze(1)
        print('qc_attn',qc_attn.size())
        
#         qc_attn = qc_attn.masked_fill(c_mask == 0, -1e9)
        # (batch, 1, c_len) * (batch, c_len, hidden_size * 2) -> (batch, hidden_size * 2)
        qc_weight = torch.bmm(qc_attn, c_encoder_feature).squeeze()
        # (batch, c_len, hidden_size * 2) (tiled)
        qc_weight = qc_weight.unsqueeze(1).expand(-1, c_len, -1)
        # p_attn=p_attn.permute(0,2,1)
        # p_attn= F.max_pool1d(p_attn,kernel_size=1,stride=1)

        
        
        x=torch.cat([qc_weight,cq_weight,c_encoder_feature],dim=-1)
        print('QCAttention x.size==',x.size())
        return x

    def att_flow_layer(self,q,c):
        """
          param c: (batch, c_len, hidden_size * 2)
          param q: (batch, q_len, hidden_size * 2)
          return: (batch, c_len, q_len)
        """
        c_len=c.size(1)
        q_len=q.size(1)
        cq=[]
        for i in range(q_len):
            qi=q.select(1,i).unsqueeze(1) #(batch,1,hidden_size*2)
            ci=self.att_weight_cq(c*qi).squeeze() # #(batch, c_len, 1)
            cq.append(ci)
        #(batch,c_len,q_len) 
        cq=torch.stack(cq,dim=-1)
        #(batch,c_len,q_len)
        s = self.att_weight_c(c).expand(-1, -1, q_len) + \
                self.att_weight_q(q).permute(0, 2, 1).expand(-1, c_len, -1) + \
                cq

        # (batch, c_len, q_len)
        a = F.softmax(s, dim=2)
        # (batch, c_len, q_len) * (batch, q_len, hidden_size * 2) -> (batch, c_len, hidden_size * 2)
        c2q_att = torch.bmm(a, q)
        # (batch, 1, c_len)
        b = F.softmax(torch.max(s, dim=2)[0], dim=1).unsqueeze(1)
        # (batch, 1, c_len) * (batch, c_len, hidden_size * 2) -> (batch, hidden_size * 2)
        q2c_att = torch.bmm(b, c).squeeze()
        # (batch, c_len, hidden_size * 2) (tiled)
        q2c_att = q2c_att.unsqueeze(1).expand(-1, c_len, -1)
        # q2c_att = torch.stack([q2c_att] * c_len, dim=1)

        # (batch, c_len, hidden_size * 8)
        x = torch.cat([c, c2q_att, c * c2q_att, c * q2c_att], dim=-1)
        return x
        
            
    
    def forward(self, q_encoder_feature, c_encoder_feature, q_mask, c_mask):
       
        x=self.qc_attention(q_encoder_feature,c_encoder_feature,c_encoder_feature.size(1),
                                           q_encoder_feature.size(1),q_mask,c_mask)
#         x=self.att_flow_layer(q_encoder_feature,c_encoder_feature)
        return  x

In [15]:
class PointerNetLoss(nn.Module):
    def __init__(self):
        super(PointerNetLoss, self).__init__()

    def forward(self, target, logits, lengths):
        """
        Args:
          target : label data (bz, tgt_max_len)
          logits : predicts (bz, tgt_max_len, src_max_len)
          lengths : length of label data (bz)
        """
        _, tgt_max_len = target.size()
        logits_flat = logits.view(-1, logits.size(-1))
        log_logits_flat = torch.log(logits_flat)
        target_flat = target.view(-1, 1)
        losses_flat = -torch.gather(log_logits_flat, dim=1, index = target_flat)
        losses = losses_flat.view(*target.size())
        mask = sequence_mask(lengths, tgt_max_len)
        mask = Variable(mask)
        losses = losses * mask.float()
        loss = losses.sum() / lengths.float().sum()
        return loss

In [16]:
class EncoderDecoder(nn.Module):
    def __init__(self,bert,hidden_dim,emb_dim,dropout):
        super(EncoderDecoder,self).__init__()
        
        self.encoder=Encoder(bert,emb_dim,hidden_dim)
        self.decoder=PointerDecoder(hidden_dim,dropout)
        self.attention=QCAttention(hidden_dim)
        self.hidden_dim=hidden_dim
        
    def forward(self,batch):
        # 题目encoder
        q_encoder_outputs=self.encoder(batch.q,batch.q_mask,batch.q_len)
    
        #文章encoder
        c_encoder_outputs=self.encoder(batch.c,batch.c_mask,batch.c_len)
    
        #题目对文章注意,文章对题目注意
#         x=self.attention(q_encoder_feature,c_encoder_feature,batch.q_mask,batch.c_mask)
        x=self.attention(q_encoder_outputs,c_encoder_outputs,batch.q_mask,batch.c_mask)


        p1,p2=self.decoder(x,batch.c_len)
        return p1,p2
    

In [17]:
class EMA():
    def __init__(self, mu):
        self.mu = mu
        self.shadow = {}

    def register(self, name, val):
        self.shadow[name] = val.clone()

    def get(self, name):
        return self.shadow[name]

    def update(self, name, x):
        assert name in self.shadow
        new_average = (1.0 - self.mu) * x + self.mu * self.shadow[name]
        self.shadow[name] = new_average.clone()

In [18]:
from collections import OrderedDict
import io
import json
import six
import sys
if six.PY2:
    reload(sys)
    sys.setdefaultencoding('utf8')
import argparse


def _tokenize_chinese_chars(text):
    """
    :param text: input text, unicode string
    :return:
        tokenized text, list
    """

    def _is_chinese_char(cp):
        """Checks whether CP is the codepoint of a CJK character."""
        # This defines a "chinese character" as anything in the CJK Unicode block:
        #     https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        #
        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
        # despite its name. The modern Korean Hangul alphabet is a different block,
        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
        # space-separated words, so they are not treated specially and handled
        # like the all of the other languages.
        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
            (cp >= 0x3400 and cp <= 0x4DBF) or  #
            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
            (cp >= 0x2B820 and cp <= 0x2CEAF) or
            (cp >= 0xF900 and cp <= 0xFAFF) or  #
            (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
            return True

        return False

    output = []
    buff = ""
    for char in text:
        cp = ord(char)
        if _is_chinese_char(cp) or char == "=":
            if buff != "":
                output.append(buff)
                buff = ""
            output.append(char)
        else:
            buff += char

    if buff != "":
        output.append(buff)

    return output


def _normalize(in_str):
    """
    normalize the input unicode string
    """
    in_str = in_str.lower()
    sp_char = [
        u':', u'_', u'`', u'，', u'。', u'：', u'？', u'！', u'(', u')',
        u'“', u'”', u'；', u'’', u'《', u'》', u'……', u'·', u'、', u',',
        u'「', u'」', u'（', u'）', u'－', u'～', u'『', u'』', '|'
    ]
    out_segs = []
    for char in in_str:
        if char in sp_char:
            continue
        else:
            out_segs.append(char)
    return ''.join(out_segs)


def find_lcs(s1, s2):
    """find the longest common subsequence between s1 ans s2"""
    m = [[0 for i in range(len(s2)+1)] for j in range(len(s1)+1)]
    max_len = 0
    p = 0
    for i in range(len(s1)):
        for j in range(len(s2)):
            if s1[i] == s2[j]:
                m[i+1][j+1] = m[i][j]+1
                if m[i+1][j+1] > max_len:
                    max_len = m[i+1][j+1]
                    p = i+1
    return s1[p-max_len:p], max_len


def evaluate(ref_ans, pred_ans):
    """
    ref_ans: reference answers, dict
    pred_ans: predicted answer, dict
    return:
        f1_score: averaged F1 score
        em_score: averaged EM score
        total_count: number of samples in the reference dataset
        skip_count: number of samples skipped in the calculation due to unknown errors
    """
    f1 = 0
    em = 0
    total_count = 0
    skip_count = 0
    for answers,prediction in zip(ref_ans,pred_ans):
        answers=eval(str(answers))
        _f1 = calc_f1_score(answers, prediction)
        f1 += _f1
        em += calc_em_score(answers, prediction)
        total_count+=1
        print("origin: {}".format('#'.join(answers)))
        print("pred: {}".format(prediction))
        print("score: {}".format(_f1))
        print('----------------------------')

    f1_score = 100.0 * f1 / total_count
    em_score = 100.0 * em / total_count
    return f1_score, em_score, total_count


def calc_f1_score(answers, prediction):
    f1_scores = []
    for ans in answers:
        ans_segs = _tokenize_chinese_chars(_normalize(ans))
        prediction_segs = _tokenize_chinese_chars(_normalize(prediction))

        lcs, lcs_len = find_lcs(ans_segs, prediction_segs)
        if lcs_len == 0:
            f1_scores.append(0)
            continue
        prec = 1.0*lcs_len/len(prediction_segs)
        rec = 1.0*lcs_len/len(ans_segs)
        f1 = (2 * prec * rec) / (prec + rec)
        f1_scores.append(f1)
    return max(f1_scores)


def calc_em_score(answers, prediction):
    em = 0
    for ans in answers:
        ans_ = _normalize(ans)
        prediction_ = _normalize(prediction)
        if ans_ == prediction_:
            em = 1
            break
    return em

In [19]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [20]:
def test(model, ema, data_iter,criterion):
    loss = 0
    answers = dict()
    model.eval()
    ref_ans=[]
    pred_ans=[]
    backup_params = EMA(0)
    for name, param in model.named_parameters():
        if param.requires_grad:
            backup_params.register(name, param.data)
            param.data.copy_(ema.get(name))

    with torch.set_grad_enabled(False):
        for i, batch in enumerate(data_iter):
            batch=rebatch(batch,'dev')
            p1, p2 =  model.forward(batch)
#             print('p1.size:',p1.size())
#             print('p2.size:',p2.size())
#             print('p1:',p1)
#             print('p1:',p2)
#             batch_loss = criterion(p1, batch.a_start) + criterion(p2, batch.a_end)
#             loss += batch_loss.item()

            # (batch, c_len, c_len)
           
            batch_size, c_len = p1.size()
            ls = nn.LogSoftmax(dim=1)
            mask = (torch.ones(c_len, c_len) * float('-inf')).to(1).tril(-1).unsqueeze(0).expand(batch_size, -1, -1)
            score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask
            score, s_idx = score.max(dim=1)
            score, e_idx = score.max(dim=1)
            print('s_idx:',s_idx)
            print('e_idx:',e_idx)
            s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze()
#             print('s_idx:',s_idx)
            for i in range(batch_size):
#                 tokenizer.convert_tokens_to_ids(batch.a_token)
                id = batch.id[i]
                answer = batch.c_token[i][s_idx[i]:e_idx[i]]
                
                answer = ''.join(answer)     
                pred_ans.append(answer)
                ref_ans.append(batch.answers[i])
                print('pre answer is :',answer)
                print('batch.answers is:',batch.answers[i])
                
                answers[id] = answer

        for name, param in model.named_parameters():
            if param.requires_grad:
                param.data.copy_(backup_params.get(name))

    with open('predict_result.txt', 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)

    f1_score, em_score, total_count = evaluate(ref_ans, pred_ans)
    return loss, f1_score, em_score


In [21]:
# "标准的训练和记录函数"
def run_epoch(data_iter, model,optimizer,criterion,ema,loss):
    start = time.time()
    total_tokens = 0
    total_loss = 0
    tokens = 0
    
    for i, batch in enumerate(data_iter):
        batch=rebatch(batch,'train')
        p1,p2 = model.forward(batch)
        optimizer.zero_grad()
        
        batch_loss = criterion(p1, batch.a_start) + criterion(p2, batch.a_end)
        loss += batch_loss.item()
        batch_loss.backward()
        optimizer.step()
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.update(name, param.data)          
    return loss

train_dataset,dev_dataset=build_dataset()
print(len(train_dataset),len(dev_dataset))

hidden_dim=256
emb_dim=128    
dropout=0.5
def train():
    learning_rate=0.005
    model=EncoderDecoder(bert_model,hidden_dim,emb_dim,dropout)
    model.to(1)
    ema = EMA(0.999)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    loss=0
    em_dev_exact, max_dev_f1 = -1, -1
    i=0
    for epoch in range(10):
        train_iter=build_iterator(train_dataset,'train',BATCH_SIZE,1)
        dev_iter=build_iterator(dev_dataset,'dev',BATCH_SIZE,1)

        loss=run_epoch(train_iter,model,optimizer,criterion,ema,loss)
        
        i+=1
        print("Epoch Step: %d Loss: %s " % (i, str(loss)))
        eval_loss, f1_score, em_score=test(model, ema, dev_iter,criterion) 
        print('eval_loss:',eval_loss)
        print('f1_score:',f1_score)
        print('em_score:',em_score)
        if f1_score > max_dev_f1:
            max_dev_f1 = f1_score
            max_dev_exact = em_score
            best_model = copy.deepcopy(model)

        loss = 0
        model.train()
        
    torch.save(best_model.state_dict(), f'./best_model.pt')
    

11622 1143


In [22]:
train()

  "num_layers={}".format(dropout, num_layers))


input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  8, 11, 11, 10, 11,  6, 11,  7,  8, 10,  9,  6, 10, 11,  9],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:哒哒网
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》歌手:董昱昆歌
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没有军
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。
batch.answers is: ['成年人']
pre answer is : 北京时间4月
batch.answers is: ['2017年6月23

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([12,  9, 12, 10, 10,  8, 10,  9, 11, 11, 10, 10,  6, 11, 11,  9],
       device='cuda:1')
pre answer is : 《人民的名义》是由最高人
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 一、2.5平方电线
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体映泰
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一下qq浏览器吧!
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个三十岁上下
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演的
batch.answers is: ['20160109期']
pre answer is : 赵又廷官方给出的身
batch.answers is: ['最多也就177']
pre answer is : 人工智障爱

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 7, 7, 7]], device='cuda:1')
e_idx: tensor([10, 12, 10, 11, 11, 11,  7, 10, 11,  5,  6, 11, 11,  5, 10, 11],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 您好，中公教育为您服务。
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始要经过
batch.answers is: ['约十天', '十天']
pre answer is : 点儿童套餐，主食（5种
batch.answers is: ['23到28之间', '23到28']
pre answer is : 您好！一般是从

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9, 10, 10, 10, 11, 10,  9, 12, 12, 11,  9, 11,  9, 10, 11,  6],
       device='cuda:1')
pre answer is : 据林语堂先生《武则
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质
batch.answers is: ['5000以上']
pre answer is : 45-59岁为中年,45岁以
batch.answers is: ['45-59岁']
pre answer is : 成年人有80，000-100，
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 10,  9, 11, 11, 11, 10, 11,  9,  7, 10, 10, 10,  9, 10, 11],
       device='cuda:1')
pre answer is : 叙利亚内部，主要是政
batch.answers is: ['三']
pre answer is : 每个星期二吧。这款游
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆覆胶、驴皮胶
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义》
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解放军
batch.answers is: ['1951年']
pre answer is : 最低额度3万起批。
batch.answers is: ['3万起

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9,  7,  7, 12, 10, 10, 10, 11, 11,  9, 10, 10, 11, 11,  9,  8],
       device='cuda:1')
pre answer is : 大家应该都知道,潘
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : 全国免费客服热线:(400##8
batch.answers is: ['4008-400-301']
pre answer is : i=n##q##vs。i:一段
batch.answers is: ['I=nqvs']
pre answer is : 关羽的

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9, 10,  9,  9,  9,  9,  9,  9,  7, 10,  9,  9, 11,  5,  9, 10],
       device='cuda:1')
pre answer is : 打胎不是小事还是要
batch.answers is: ['博爱']
pre answer is : 20兆宽带下载速度是2
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 减脂期间，低碳水的
batch.answers is: ['一个拳头大小']
pre answer is : 现在很贵了，基本都
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer is : 烘干机作为一个新兴
batch.answers

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 10, 11, 10,  9,  9, 10,  8, 10, 11, 10,  9, 10, 10, 10, 10],
       device='cuda:1')
pre answer is : 大家都知道,手足口病
batch.answers is: ['6月龄']
pre answer is : 80年的价格分别是：一
batch.answers is: ['100']
pre answer is : 招行有东航联名信用卡，
batch.answers is: ['招行']
pre answer is : 阿哲的男徒弟,阿钊。
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19日,所以
batch.answers is: ['双鱼座']
pre answer is : 开发微信公众平台
batch.answers is: ['3000-万元']
pre answer is : 九寨沟位于四川省阿坝
batch.answers is: ['四川省阿坝藏族羌族

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 7, 7, 7],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11, 10, 10,  9,  8, 12,  9,  6,  8,  7,  6,  7, 10,  9,  9],
       device='cuda:1')
pre answer is : 力，阅读
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北两大生
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的双眼
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : 基本信息栏英文名:po
batch.answers is: ['1000-7000元']
pre answer is : 云烟(冬虫夏草和润
batch.answers is: ['60元', '每包才60元钱']
pre answer is : 五星

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11, 10,  6, 10, 10,  7, 10, 10, 10, 10, 11,  6,  7,  7, 10],
       device='cuda:1')
pre answer is : 你好！今年的审计费用标
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : 正确的热车方
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女侠（w##ond##erw##oman
batch.answers is: ['神奇女侠']
pre answer is : 根据腾讯公司规
batch.ans

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 10, 10,  6, 11,  7,  9, 10, 11, 10,  5, 12,  7,  9,  9, 11],
       device='cuda:1')
pre answer is : 首艘国产航母正式下水
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 要想知道种植牙的价格
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从第一部到第
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤，不要
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 是的！杏花如雨
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至北京市东南
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 大约5000多，需要自己有
batch

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10,  9, 12, 10, 10, 10,  9, 10, 11,  8,  9,  9,  6, 10,  9, 11],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如何远离白
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰原哀很有个
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一对
batch.answers is: ['怀孕3个小时之后']
pre answer is : 通常整个check时间需要2
batch.answers is: ['2-3周']
pre answer is : 目前活动期间开通年费
ba

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  6,  7,  6, 11,  8, 10, 11,  6, 11, 10, 11, 10,  7, 10, 10],
       device='cuda:1')
pre answer is : 性价比要看你所选钢琴的
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 今年中考全省统
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是已故富豪邓肇坚
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设置
batch.answers is: ['30分钟']
pre answer is : 如果是窄口铅笔裙的话
batch.answers is: ['高腰与迷你

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11, 10,  9,  9, 10,  9, 10, 10, 10, 11, 10, 10, 10,  7, 10],
       device='cuda:1')
pre answer is : 黑袍的身份，在周园的时
batch.answers is: ['周独夫的妹妹']
pre answer is : 一、团员年满28周岁，如
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 从1992年10月起,第一
batch.answers is: ['4月1日']
pre answer is : 2017年6月英语四级考
batch.answers is: ['6月17日']
pre answer is : 椅子的名称始见于唐代
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤隆会啊,本
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬[UNK]象征孩子
batch.

origin: 亦庄工业园站下车即到#亦庄工业园站#北京大兴区亦庄经济开发区经海三路
pred: 乘车路线:乘820、98##6
score: 0.125
----------------------------
origin: 今年年中
pred: 据小编从宝马汽车官方了
score: 0
----------------------------
origin: 贾迎春的丫头
pred: 司棋[1],中国古典小
score: 0
----------------------------
origin: 移动WLAN的附加产品
pred: 的,和
score: 0.2
----------------------------
origin: 238元
pred: 小皙护肤品补水效果
score: 0
----------------------------
origin: 880万美元
pred: 比
score: 0
----------------------------
origin: 龙胆泻肝丸
pred: 病情分析:您好,您的
score: 0
----------------------------
origin: 严晓秋
pred: 一向热心善良的唐明(
score: 0
----------------------------
origin: 冬天
pred: 冬天很不错。可以给你推
score: 0.33333333333333337
----------------------------
origin: 16岁
pred: 女生练习跆拳道的最佳
score: 0
----------------------------
origin: 95511
pred: 平安银行95##51##1电话按
score: 0
----------------------------
origin: 今年8月份
pred: 近日,玩车之
score: 0
----------------------------
origin: 处女座#处女
pred: 他可能会让你等待很
score: 0
----------------------------
origin: 400-167-6796
pred: 南京志高空调售后)[UNK]
score: 0
-------

pred: 小编的闺中好友花1200抽
score: 0
----------------------------
origin: 1匹#1匹空调
pred: 家庭选用空调的时候
score: 0.30769230769230765
----------------------------
origin: 1月13日-16日
pred: 1.2017年上半年教师资
score: 0
----------------------------
origin: 骨科
pred: 腰痛是以腰部一侧或两侧
score: 0
----------------------------
origin: 10几个
pred: 病情分析:你好，你的
score: 0
----------------------------
origin: 绿茸线蛇
pred: ;世界上最长寿的动物不
score: 0
----------------------------
origin: 傅博文
pred: 其实外科风云剧情介绍
score: 0
----------------------------
origin: 预计在明年#明年
pred: 时隔八年幻想三国志再出
score: 0.15384615384615385
----------------------------
origin: 15岁
pred: 张丹峰儿子张浩锋，今
score: 0
----------------------------
origin: 大概在200元到1000元#200元到1000元
pred: 计算和确定一个衣柜的价钱
score: 0
----------------------------
origin: 10010
pred: 人工客服热线的电
score: 0
----------------------------
origin: 上午
pred: 喝绿茶最好的时间就是
score: 0
----------------------------
origin: 乌鲁木齐市的华凌市场
pred: 乌鲁木齐市的华凌市场
score: 1.0
----------------------------
origin: 一般在几百至千元不等#一般在几百至千元#几百至千元#几百至千元不等
pred: 包皮

input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  8,  6, 10, 10, 11,  6,  6, 10,  5,  9,  9,  6, 10, 10,  9],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:哒哒网
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》歌手
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没有
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。
batch.answers is: ['成年人']
pre answer is : 北京时间4月
batch.answers is: ['2017年6月23日']
pr

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  9, 10, 10, 10,  9, 10,  9,  9, 11, 10, 10,  6,  5, 11, 10],
       device='cuda:1')
pre answer is : 《人民的名义》是由最高
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 一、2.5平方电线
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一下qq浏览器吧!
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个三十岁上下
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式宣
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演的
batch.answers is: ['20160109期']
pre answer is : 赵又廷官方给出的身
batch.answers is: ['最多也就177']
pre answer is : 人工智障爱酱,

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 12, 10, 11, 11, 11,  7, 10, 10, 11, 11, 11, 11,  5, 10, 11],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 您好，中公教育为您服务。
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始要经过
batch.answers is: ['约十天', '十天']
pre answer is : 点儿童套餐，主食（5种
batch.answers is: ['23到28之间', '23到28']
pre answer is : 您好！一般是从

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9, 10, 10, 10, 11, 10,  9, 11, 11,  6, 12, 11, 11, 10, 10,  6],
       device='cuda:1')
pre answer is : 据林语堂先生《武则
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质
batch.answers is: ['5000以上']
pre answer is : 45-59岁为中年,45岁以
batch.answers is: ['45-59岁']
pre answer is : 成年人有80，000-100，
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9, 10,  9,  6, 11, 10, 11, 10,  8,  7,  9,  9, 10,  9, 10, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主要是
batch.answers is: ['三']
pre answer is : 每个星期二吧。这款游
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆覆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解放
batch.answers is: ['1951年']
pre answer is : 最低额度3万起批
batch.answers is: ['3万起批']
pre 

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10,  7,  7, 12, 10, 10, 10,  9, 10,  9,  9,  5, 11,  8,  9,  8],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : 全国免费客服热线:(400##8
batch.answers is: ['4008-400-30

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 11, 10,  8,  9,  9,  9, 10,  5, 10,  9,  9, 11,  6,  9, 10],
       device='cuda:1')
pre answer is : 打胎不是小
batch.answers is: ['博爱']
pre answer is : 20兆宽带下载速度是2.
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 减脂期间，低碳水的饮
batch.answers is: ['一个拳头大小']
pre answer is : 现在很贵了，基本
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer is : 烘干机作为一个新兴
batch.answers is

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 10, 11, 10,  9,  9, 10,  6, 10, 11, 10, 10, 10, 10, 10, 10],
       device='cuda:1')
pre answer is : 大家都知道
batch.answers is: ['6月龄']
pre answer is : 80年的价格分别是：一
batch.answers is: ['100']
pre answer is : 招行有东航联名信用卡，
batch.answers is: ['招行']
pre answer is : 阿哲的男徒弟,阿钊。
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19日,所以
batch.answers is: ['双鱼座']
pre answer is : 开发微信公众
batch.answers is: ['3000-万元']
pre answer is : 九寨沟位于四川省阿坝
batch.answers is: ['四川省阿坝藏族羌族自治州九寨沟县

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 1, 2,  ..., 7, 7, 7],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  8, 10, 10,  9, 10, 12,  9,  6,  6,  7,  6,  7, 10,  9, 10],
       device='cuda:1')
pre answer is : 力，阅读
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的双眼
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : 基本信息栏英文名:po
batch.answers is: ['1000-7000元']
pre answer is : 云烟(冬虫夏草和润
batch.answers is: ['60元', '每包才60元钱']
pre answer is : 五星级酒店

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11, 10, 10, 10,  6,  7, 10, 10, 10, 10, 11,  6,  7,  9, 10],
       device='cuda:1')
pre answer is : 你好！今年的审计费用标
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : 正确的热车方法应该是
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女侠（w
batch.answers is: ['神奇女侠']
pre answer is : 根据腾讯公司规
batch.answers is: ['5

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 10, 11,  6,  9,  7,  9, 10, 11,  5,  5, 12,  7,  6,  9, 11],
       device='cuda:1')
pre answer is : 首艘国产航母正式下水
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 要想知道种植牙的价格
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从第一部到第三
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤，
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 是的！杏花如雨
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至北京市东南
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 大约5000多，需要自己有
batch.

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10,  5,  9, 10, 10,  8,  9, 10, 11,  8,  9, 11,  6,  8,  8, 11],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰原哀
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一对
batch.answers is: ['怀孕3个小时之后']
pre answer is : 通常整个check时间需要2
batch.answers is: ['2-3周']
pre answer is : 目前活动期间开通
batch.answe

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 6,  6, 10,  8, 11,  6, 11, 10,  6,  8, 10, 11, 10,  7, 10, 10],
       device='cuda:1')
pre answer is : 性价比要看你
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 今年中考全省统考,备
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语翻译
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是已故富豪邓肇坚
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间
batch.answers is: ['30分钟']
pre answer is : 如果是窄口铅笔裙的话，
batch.answers is: ['高腰与迷你还

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 11, 10,  7,  9, 10,  9, 10,  6, 10,  8, 10, 10, 10,  6, 10],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 一、团员年满28周岁，如
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 从1992年10月起,
batch.answers is: ['4月1日']
pre answer is : 2017年6月英语四级考
batch.answers is: ['6月17日']
pre answer is : 椅子的名称始见于唐代
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤隆会啊,本
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬[UNK]
batch.answers is: 

----------------------------
origin: 马来西亚的货币林吉特#马来西亚的货币
pred: rm是马来西亚
score: 0.6153846153846153
----------------------------
origin: 物理内存的2倍#物理内存的2倍左右
pred: 虚拟内存设置大小一般
score: 0.23529411764705882
----------------------------
origin: 20世纪90年代
pred: 现代远程教育是相对于
score: 0.125
----------------------------
origin: 12个月
pred: 根据国务院《女职工劳
score: 0
----------------------------
origin: 22
pred: 二号字体对应的
score: 0
----------------------------
origin: 二百块以上
pred: 二百块以上，如果是实
score: 0.7142857142857143
----------------------------
origin: 超过133umol/L
pred: 你好，血清肌酐的浓
score: 0
----------------------------
origin: 三本
pred: 四川大学锦江学院（si
score: 0
----------------------------
origin: 新的记分周期
pred: 当事人的驾驶证分数
score: 0.13333333333333333
----------------------------
origin: 12.1.2
pred: 目前itunes最新版本的型
score: 0
----------------------------
origin: 信越的X-23-7783-D#7783
pred: 日本信越的x-23-
score: 0.6
----------------------------
origin: 卡尔的萌宠庄园
pred: 卡尔的萌宠庄园！营
score: 0.9333333333333333
----------------------------
origin: 三

----------------------------
origin: 8月中旬
pred: 2017年一级消防工程师考
score: 0
----------------------------
origin: 四#四次
pred: 1，现在广东还可
score: 0
----------------------------
origin: 4008899315
pred: 感谢您对美的空调的关
score: 0
----------------------------
origin: vivoX9#三星6
pred: oppo##r##9##s，拍照功能
score: 0
----------------------------
origin: 停产前价格一般卖到12000左右#停产前价格一般卖到12000
pred: 雅马哈劲虎已停产多年
score: 0.20000000000000004
----------------------------
origin: 0.06克#0.3克拉
pred: 一克拉也就是0.
score: 0.4
----------------------------
origin: 加10以上
pred: dnf强化无论什么装备,
score: 0
----------------------------
origin: 实况8
pred: 实况8个人觉得是最经
score: 0.4615384615384615
----------------------------
origin: 大概130-180#130-180
pred: 室内木塑地板现在价
score: 0
----------------------------
origin: 地图最东南
pred: 星露谷物语star##de##w##val##ley
score: 0
----------------------------
origin: 728,389,000
pred: 据2011年的统计数
score: 0
----------------------------
origin: 1997年以后
pred: 你好同学，
score: 0
----------------------------
origin: 怀孕四十九天之内
pred: 您好,在怀孕

input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 7, 7, 7],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  8,  5, 10, 10, 11, 10,  6, 10,  8,  5,  9,  8, 10, 11,  9],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:哒哒网
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》歌
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没有
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。
batch.answers is: ['成年人']
pre answer is : 北京时间4月21日消息
batch.answers is: ['2017年6月23日'

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  9, 10,  7, 10,  9, 10,  9,  9, 11, 11, 10, 10,  5, 11, 10],
       device='cuda:1')
pre answer is : 《人民的名义》是由最高
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 一、2.5平方电线
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一下qq浏览
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个三十岁上下
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式宣
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演的
batch.answers is: ['20160109期']
pre answer is : 赵又廷官方给出的身
batch.answers is: ['最多也就177']
pre answer is : 人工智障爱酱,是a


encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 12, 10, 10, 11, 10,  7, 10, 10, 11, 11, 11, 10, 10, 10, 11],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 您好，中公教育为您服务。
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始要经过
batch.answers is: ['约十天', '十天']
pre ans

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 1,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([ 9, 10, 10, 10, 11, 10,  8, 11, 12,  6, 12,  5, 11, 10, 10, 14],
       device='cuda:1')
pre answer is : 据林语堂先生《武则
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质
batch.answers is: ['5000以上']
pre answer is : 45-59岁为中年,45岁以
batch.answers is: ['45-59岁']
pre answer is : 成年人有80，000-100，
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9,  7,  9,  5, 11, 10, 11, 10,  8,  7,  9,  9, 10, 12, 11, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主要是
batch.answers is: ['三']
pre answer is : 每个星期二吧。
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解放
batch.answers is: ['1951年']
pre answer is : 最低额度3万起批
batch.answers is: ['3万起批']
pre answ

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10,  9,  7, 12, 10, 10, 10,  9, 10,  9,  9,  5, 10,  8,  9,  8],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205##3人
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : 全国免费客服热线:(400##8
batch.answers is: ['4008-40

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 11, 10,  9,  9,  9, 11, 10,  5,  7,  9, 10, 11,  9,  5,  5],
       device='cuda:1')
pre answer is : 打胎不是小事还是要考
batch.answers is: ['博爱']
pre answer is : 20兆宽带下载速度是2.
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 减脂期间，低碳水的饮
batch.answers is: ['一个拳头大小']
pre answer is : 现在很贵了，基本都
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer is : 烘干机作为一个新兴产业
batch.an

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 8, 11, 11, 10,  9,  9,  6, 11,  6, 11, 10, 10, 10, 10, 10,  6],
       device='cuda:1')
pre answer is : 大家都知道,手足
batch.answers is: ['6月龄']
pre answer is : 80年的价格分别是：一元
batch.answers is: ['100']
pre answer is : 招行有东航联名信用卡，
batch.answers is: ['招行']
pre answer is : 阿哲的男徒弟,阿钊。
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19
batch.answers is: ['双鱼座']
pre answer is : 开发微信公众平台需费用
batch.answers is: ['3000-万元']
pre answer is : 九寨沟位于四
batch.answers is: ['四川省阿坝藏族羌族自治州九寨沟

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 1, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  8, 10, 10,  9,  5, 12, 10,  6,  5,  7,  9,  7,  5,  9, 10],
       device='cuda:1')
pre answer is : 阅读
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的双眼
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : 基本信息栏英文名:po
batch.answers is: ['1000-7000元']
pre

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11, 10, 10, 10,  6,  7, 10, 10, 10, 10, 16,  6,  7, 11, 10],
       device='cuda:1')
pre answer is : 你好！今年的审计费用标
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : 
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女侠（w
batch.answers is: ['神奇女侠']
pre answer is : 根据腾讯公司规
batch.answers is: ['50', '50个']

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 10, 11,  8,  9, 11,  4, 10, 11,  5, 10, 12,  9,  6,  9, 11],
       device='cuda:1')
pre answer is : 首艘国产航母正式下水
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 要想知道种植牙的价格
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从第一部到第三
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配的处
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤，
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 是的！杏花如雨，梨花似
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 大约5000多，需要自己有
batch

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  5,  9, 10, 10,  8, 10, 10, 11,  8,  9, 11, 11,  8,  8, 11],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每月
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰原哀
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一对
batch.answers is: ['怀孕3个小时之后']
pre answer

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([11,  6, 10,  8,  4,  7, 11, 10,  8,  8,  6,  4, 10,  9, 10, 10],
       device='cuda:1')
pre answer is : 性价比要看你所选钢琴的
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 今年中考全省统考,备
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语翻译
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设
batch.answers is: ['30分钟']
pre answer is : 如果是窄口铅笔裙的话，
batch.answers is: ['高腰与迷你还有

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 11, 10, 10, 11,  6,  6, 10,  6, 10, 10,  9, 10, 10,  6, 10],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 一、团员年满28周岁，如
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 从1992年10月起,第一个
batch.answers is: ['4月1日']
pre answer is : 2017年6月英语四级考试时
batch.answers is: ['6月17日']
pre answer is : 椅子的名称始
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤隆会
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬[UNK]
batch.answers is: ['

origin: 早餐#早上
pred: 你是否曾想
score: 0
----------------------------
origin: 约68立方#68立方#约为68立方米#68立方米
pred: 40尺平柜可装约54立方,
score: 0.30769230769230765
----------------------------
origin: 594×420mm
pred: 4开纸的尺寸是59
score: 0
----------------------------
origin: 成交额的万分之1.8
pred: 器人就是机械
score: 0
----------------------------
origin: Top7#7
pred: top##7.加坦
score: 0
----------------------------
origin: 壮元宁
pred: 首先确定肾阴虚吃什么
score: 0
----------------------------
origin: 4000块左右#4000块
pred: 疗主要以改善
score: 0
----------------------------
origin: 约110亿美元#110亿美元
pred: 洛克菲勒家族是美国最
score: 0.14285714285714288
----------------------------
origin: 第56集#56
pred: 第56集白浅恢复
score: 0.6
----------------------------
origin: 廊坊市人民医院
pred: 科室名称:妇产科所
score: 0
----------------------------
origin: 博爱
pred: 打胎不是小事还是要考
score: 0
----------------------------
origin: 公办二本院校#二本
pred: 成都师范学
score: 0
----------------------------
origin: 赵立春#苏荣
pred: 赵立春是汉
score: 0.7499999999999999
----------------------------
origin: 大概是几千到一万多#几千到一万

score: 0
----------------------------
origin: 炫美整形#米扬整形
pred: 我2011年5月末做的双眼
score: 0
----------------------------
origin: 1000-7000元
pred: 基本信息栏英文名:po
score: 0
----------------------------
origin: 60元#每包才60元钱
pred: 
score: 0
----------------------------
origin: 五星级
pred: 五星级酒店
score: 0.7499999999999999
----------------------------
origin: 一般在两年左右#一般在两年#两年左右
pred: 病情分析:你好,在任何年
score: 0.14285714285714288
----------------------------
origin: 年费是205
pred: 名或
score: 0
----------------------------
origin: 莫雨
pred: 唐三十六和陈
score: 0
----------------------------
origin: 成都西南男科医院
pred: 您好,成都
score: 0.3333333333333333
----------------------------
origin: 第十六章
pred: 推荐打第十六章
score: 0.7272727272727273
----------------------------
origin: 97周年
pred: 今年是[UNK]五四[UNK]运动
score: 0.16666666666666666
----------------------------
origin: 联想ideapad
pred: 玩游戏呢，首先
score: 0
----------------------------
origin: 长征七号素质拓展训练基地
pred: （1300）长征
score: 0.26666666666666666
----------------------------
origin: 192厘米
pred: 

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size(

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  5,  4,  9, 10, 11, 10,  6, 10,  8,  5,  9,  8, 10, 11, 10],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。
batch.answers is: ['成年人']
pre answer is : 北京时间4月21日消息
batch.answers is: ['2017年6月23日']
pre

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 16, 12,  7, 10, 10, 10, 17,  9, 11, 16, 10, 10,  5, 11, 10],
       device='cuda:1')
pre answer is : 《人民的名义》是由最
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 一、2.5平方电线能带多少瓦国标
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体映泰
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一下qq浏览
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个三十岁上下
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式宣布
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演的
batch.answers is: ['20160109期']
pre answer is : ,黄海波177
batch.answers is: ['最多也就177']
pre answer is : 人工智

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 12, 10, 10, 11,  4, 11, 16, 10, 11, 11, 11, 11, 17,  9, 11],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5##×
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 您好，中公教育为您服务。
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始要经过
batch.answers is: ['约十天', '十天']
pre 

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 1,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([ 7, 10, 10, 11, 11, 10,  8, 11, 14,  5, 12,  5, 11, 10,  5, 14],
       device='cuda:1')
pre answer is : 据林语堂先生《
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质吧
batch.answers is: ['5000以上']
pre answer is : 45-59岁为中年,45岁以
batch.answers is: ['45-59岁']
pre answer is : 成年人有80，000-100，
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is 

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 7, 7, 7],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 7,  9,  9,  5, 11,  9, 11,  9, 16,  7,  9, 10, 12, 18, 11, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主
batch.answers is: ['三']
pre answer is : 每个星期二吧。这款
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解
batch.answers is: ['1951年']
pre answer is : 最低额度3万起批。精英尊尚白金卡
batch.answers is: ['3万起批']
pr

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([10, 18,  7, 12, 16, 10, 11, 10, 10,  9,  9,  5, 10,  8,  8, 17],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205##3人民币，美元(usd)兑
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : 全国免费客服热线:(400##8
batch.answers is: ['4008-400-301']
pre answer is : i=n##q##vs。i:一段导体中的电流
batch.answers is: ['I=nqvs

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 7, 7, 7],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 11, 10,  9,  9, 11, 11,  8,  5,  9,  9, 10, 11,  9,  5,  5],
       device='cuda:1')
pre answer is : 打胎不是小事还是要考
batch.answers is: ['博爱']
pre answer is : 度是2.
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 减脂期间，低碳水的饮
batch.answers is: ['一个拳头大小']
pre answer is : 都
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年采购
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer is : 烘干机作为一个新兴产业
batch.answers is: ['10

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 8, 11,  5, 10, 17,  9,  6, 11,  4,  9, 10, 10, 10, 10, 10,  6],
       device='cuda:1')
pre answer is : 大家都知道,手足
batch.answers is: ['6月龄']
pre answer is : 元
batch.answers is: ['100']
pre answer is : 招行有东航
batch.answers is: ['招行']
pre answer is : 阿哲的男徒弟,阿钊。
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方形次之；长方形最
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19
batch.answers is: ['双鱼座']
pre answer is : 平台需费用
batch.answers is: ['3000-万元']
pre answer is : 九寨沟位
batch.answers is

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 1, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  8,  8, 10, 11,  5, 17, 10,  6,  5,  7,  9,  4,  5,  9, 10],
       device='cuda:1')
pre answer is : 阅读
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : 文名:po
batch.answers is: ['1000-7000元']
pre answer is : )的
batch.answers is: ['60元', '每包才60元钱']
pre answer is : 五星级酒店
batch.answers is

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([11, 11, 10, 10, 16,  3,  7, 10, 10, 10, 12, 16,  6,  7, 13, 10],
       device='cuda:1')
pre answer is : 标
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : 
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑战第三季》成
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女
batch.answers is: ['神奇女侠']
pre answer is : 根据腾讯公司规
ba

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 10,  6, 11,  9, 11,  4, 10, 16,  5, 10, 13, 10,  5,  9, 11],
       device='cuda:1')
pre answer is : 首艘国产航母正式下水
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 格
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从第
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配的处理器型
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤，
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 是的！杏花如雨，梨花似
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 自己有
batch.answers is: ['大约5000

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  5,  7,  9, 10, 15, 11, 10, 11,  8,  9, 11, 16,  8,  8, 17],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每月
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一
batch.answers is: ['怀孕3个小时之后']
pre answer is

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([11,  6, 17,  8,  4,  7, 10, 11,  9,  8,  5,  4, 10,  9, 10, 11],
       device='cuda:1')
pre answer is : 性价比要看你所选钢琴的
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 今年中考全省统考,备受关注的中考时
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语翻译
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设
batch.answers is: ['30分钟']
pre answer is : 如果是窄口铅笔裙的话
batch.answers is: ['高

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 11, 10, 10, 11,  6,  4, 10,  6, 10, 11,  9, 10, 12,  5,  9],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 一、团员年满28周岁，如
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 从1992年10月起,第一个
batch.answers is: ['4月1日']
pre answer is : 2017年6月英语四级考试时
batch.answers is: ['6月17日']
pre answer is : 椅子的名称始
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬[UNK]
batch.answers is: ['5～

----------------------------
origin: 今年中旬
pred: 日前,我们从相关渠道
score: 0
----------------------------
origin: 15000-200000IU/L
pred: 怀孕第7周,孕妇的血h##cg正常值为15000
score: 0
----------------------------
origin: VEGAS#爱剪辑
pred: 你可以先去学ve##gas,这
score: 0
----------------------------
origin: 三百多一罐
pred: 粉的价格在三百多一
score: 0.5714285714285714
----------------------------
origin: 5%
pred: 增值税
score: 0
----------------------------
origin: 无锡嘉仕恒信医院
pred: 无锡哪家医院做无
score: 0.25
----------------------------
origin: 几百元到几千元不等#几百元到几千元
pred: 病情分析：你好一般治疗前列腺炎，
score: 0
----------------------------
origin: 5月下旬
pred: 根据往年成绩发布时间来看,2017年公务
score: 0
----------------------------
origin: 锡婚
pred: 结婚十周年是锡婚。
score: 0.4
----------------------------
origin: 刘丰源
pred: 那么王子文为
score: 0
----------------------------
origin: 400-6177-626
pred: 
score: 0
----------------------------
origin: 400-0088-902
pred: 维修中心报修热线:
score: 0
----------------------------
origin: 在副职业导师的左边
pred: 暗精灵遗迹在
score: 0.13333333333333333
--------------------

----------------------------
origin: QQ影音
pred: qq影音就可以
score: 0.6666666666666666
----------------------------
origin: 5天
pred: 京
score: 0
----------------------------
origin: 年基准利率4.35%#4.35%
pred: 年基准利率4.35%
score: 1.0
----------------------------
origin: 6月1日
pred: 国际儿童节，简称儿童
score: 0
----------------------------
origin: 1到3个工作日
pred: 款平
score: 0
----------------------------
origin: 邓伦
pred: 谢童的扮演
score: 0
----------------------------
origin: 10天后
pred: 泡制10天后即可食用网
score: 0.4615384615384615
----------------------------
origin: 10010
pred: 介绍
score: 0
----------------------------
origin: 牛肉#三叉
pred: 是指牛肉，又称三叉，
score: 0.4
----------------------------
origin: 非系统盘
pred: 360是绿色软件，
score: 0
----------------------------
origin: 5月14日至15日
pred: 合作高
score: 0
----------------------------
origin: 1个月到6个月#1个月到6个月不等
pred: 不同，学
score: 0.16666666666666666
----------------------------
origin:  一个半小时以内
pred: 我前几天刚给宝宝化验的大便我家这
score: 0
----------------------------
origin: 一元人民币约等于5.0baht
pred: 泰铢(也称泰

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size(

input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  5,  4,  9, 10, 11, 10, 12, 10, 17,  5,  9,  6, 10, 11, 10],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。
batch.answers is: ['成年人']
pre answer is : 北京时间4月21日消息
batch.answers is: ['2017年6月23日']
pre

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ..., 10, 10, 10]], device='cuda:1')
e_idx: tensor([10, 16, 17,  7,  5, 10, 10, 17,  9, 17, 16, 10, 10,  5, 11, 17],
       device='cuda:1')
pre answer is : 《人民的名义》是由最
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 能带多少瓦国标
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体映泰z2##70##gt##6电
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一下qq浏览
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式宣布
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演的
batch.answers is: ['20160109期']
pre answer is : ,黄海波177
batch.answers

x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([11, 17, 10, 11, 15,  4, 17, 16, 11, 11, 18, 11, 18, 17,  9, 11],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5##×
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 。考公务员需
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始要经过
batch.answers is: ['约十天', '十天']
pre answer is : （5种
ba

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 1,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([ 7, 10, 10, 11, 11, 10,  8, 12, 17,  5, 12,  5, 11, 10,  5, 14],
       device='cuda:1')
pre answer is : 据林语堂先生《
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质吧
batch.answers is: ['5000以上']
pre answer is : 45-59岁为中年,45岁以
batch.answers is: ['45-59岁']
pre answer is : -100，
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is : mac系统默认截

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 7,  9,  9,  5, 11, 10, 11,  9, 16,  7,  9, 11, 17, 18, 11, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主
batch.answers is: ['三']
pre answer is : 每个星期二吧。这款
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解
batch.answers is: ['1951年']
pre answer is : 。精英尊尚白金卡


encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([10, 18,  7, 12, 16, 10, 11, 11, 10,  9,  9,  4, 10,  8,  8, 17],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205##3人民币，美元(usd)兑
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : 全国免费客服热线:(400##8
batch.answers is

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  9,  9,  9],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([10, 17, 10,  9,  9, 17, 16,  8,  5,  9,  5, 10, 17, 12, 10,  5],
       device='cuda:1')
pre answer is : 打胎不是小事还是要考
batch.answers is: ['博爱']
pre answer is : .5m##b/秒，360
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 饮
batch.answers is: ['一个拳头大小']
pre answer is : 都
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年采购成本/应付账
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answe

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 8, 11,  5,  4, 17, 10,  6, 13,  4,  9, 10, 18, 10, 17, 10,  4],
       device='cuda:1')
pre answer is : 大家都知道,手足
batch.answers is: ['6月龄']
pre answer is : 元
batch.answers is: ['100']
pre answer is : 招行有东航
batch.answers is: ['招行']
pre answer is : 阿哲的男
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方形次之；长方形最
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中共
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19
batch.answers is: ['双鱼座']
pre answer is : 平台需费用3000-
batch.answers is: ['3000-万元']
pre answer is : 九寨沟位
batch.answers is

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11,  8, 10, 17,  5, 17, 17,  5,  5,  4,  9,  4,  5,  9, 10],
       device='cuda:1')
pre answer is : 阅读
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北两大生
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : :po
batch.answers is: ['1000-7000元']
pre answer

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([12, 16, 10, 17, 16,  3,  7, 10, 17, 17, 16, 16,  6,  8, 13, 10],
       device='cuda:1')
pre answer is : 标准
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,劳动合同可
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : ，在发动后30秒
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑战第三季》成
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女
batch.answers is: ['神奇女侠']
pre answer i

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([10, 12,  6, 11,  9, 11,  4, 10, 16,  5, 18, 13, 18,  5,  9, 11],
       device='cuda:1')
pre answer is : 首艘国产航母正式下水
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 格我们
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从第
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配的处理器型
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤，
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 似
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 自己有
batch.answers is: ['大约5000多', '大约5

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([18,  5,  7,  9, 16, 15, 17, 11, 11,  8,  9, 15, 16,  8,  8, 17],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每月1号,还款日为
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一
batch.answers is: ['怀孕3个小时之后']
pre an

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 12, 12, 12],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11]], device='cuda:1')
e_idx: tensor([ 4,  6, 17,  8,  4, 10,  8, 11,  9,  8,  5,  4, 10,  9, 11, 11],
       device='cuda:1')
pre answer is : 性价比要
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 注的中考时
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语翻译
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设置set##tings
batch.answers is: ['30分钟']
pre answer is : 如果是窄

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 11, 10, 16, 12,  6,  4, 10, 10, 10, 11, 12, 10, 11,  5,  9],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 岁，如
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 一个国家级新区落
batch.answers is: ['4月1日']
pre answer is : 2017年6月英语四级考试时间
batch.answers is: ['6月17日']
pre answer is : 椅子的名称始
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬[UNK]象征孩子
batch.answers is: ['5～8个月', '5～8

----------------------------
origin: 三本
pred: 四川大学锦江学
score: 0
----------------------------
origin: 新的记分周期
pred: 当事人的驾驶证分数不是在每年年底清
score: 0.08695652173913045
----------------------------
origin: 12.1.2
pred: 目前itunes最新版本的型
score: 0
----------------------------
origin: 信越的X-23-7783-D#7783
pred: 日本信越的
score: 0.6666666666666665
----------------------------
origin: 卡尔的萌宠庄园
pred: 卡尔的萌
score: 0.7272727272727273
----------------------------
origin: 三到五根
pred: 、6根，300##ml水，
score: 0.25
----------------------------
origin: 大概250元#250元
pred: 调主要是空气通
score: 0
----------------------------
origin: 应该是7,8,9月份#7,8,9月份
pred: 关山草原位于陕西省
score: 0
----------------------------
origin: 2.64尺#2尺6寸4
pred: 呵呵，可惜我来晚
score: 0
----------------------------
origin: 850-1100元/立方#850-1100元/立方左右
pred: 材质的，
score: 0
----------------------------
origin: 2017年5月30日#5月30日
pred: 2017年的端午节是2017年5
score: 0.37499999999999994
----------------------------
origin: 50一包
pred: 除
score: 0
----------------------------
origin: 等级1
pr

score: 0
----------------------------
origin: 小葵
pred: 这是5岁的小
score: 0.25
----------------------------
origin: 14天
pred: 周期是多长才
score: 0
----------------------------
origin: 金煌芒
pred: 台农芒：黄
score: 0.28571428571428575
----------------------------
origin: 麦克疯
pred: 麦克疯试试
score: 0.7499999999999999
----------------------------
origin: 生肖兔宝珠
pred: 看到是你,我就来
score: 0
----------------------------
origin: 许龄月
pred: 择天记莫雨扮演者许龄月
score: 0.42857142857142855
----------------------------
origin: 90/60-139/89
pred: 
score: 0
----------------------------
origin: 亚历珊德拉·达达里奥
pred: 正在中国内地
score: 0
----------------------------
origin: 月经干净后3-7日
pred: 专家指出，宫颈糜烂治疗
score: 0
----------------------------
origin: 下午3点至5点
pred: 下午3点至5点是最佳
score: 0.8235294117647058
----------------------------
origin: 美利达公爵600
pred: 买美利达公爵600吧
score: 0.8571428571428571
----------------------------
origin: ⑴400--100--8087 〔2〕400-9967-698
pred: 
score: 0
----------------------------
origin: 89mm*127mm
pred: ##mm*127##mm.
score: 0
--

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size(

input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  5,  4,  9, 10, 10, 11, 12, 10, 17,  5,  9,  6, 10, 11, 10],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用
batch.answers is: ['成年人']
pre answer is : 北京时间4月21日消息,
batch.answers is: ['2017年6月23日']
pre

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ..., 10, 10, 10]], device='cuda:1')
e_idx: tensor([10, 16, 17,  4,  5, 16, 10, 17,  9, 17, 16, 17, 12,  5, 11, 17],
       device='cuda:1')
pre answer is : 《人民的名义》是由最
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 能带多少瓦国标
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体映泰z2##70##gt##6电
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一下
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式宣布,将于6月10
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演的
batch.answers is: ['20160109期']
pre answer is : ,黄海波177
batch.answ

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([11, 17, 10, 11, 15,  4, 17, 16, 11, 11, 18, 11, 18, 17, 10, 11],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5##×
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 。考公务员需
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始要经过
batch.answers is: ['约十天', '十天']
pre answer is : 5种
batch.answers is: ['23到28之间', '23到28']

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 1,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([ 7, 10, 10, 11, 19, 17,  8, 15, 17,  5, 12,  5, 17, 10,  5, 14],
       device='cuda:1')
pre answer is : 据林语堂先生《
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质吧
batch.answers is: ['5000以上']
pre answer is : 下为青年。60-74
batch.answers is: ['45-59岁']
pre answer is : -100，000根头发，不同
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is : mac

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 7,  9,  9,  5, 11, 10, 11, 16, 16,  7,  9, 11, 17, 18, 11, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主
batch.answers is: ['三']
pre answer is : 每个星期二吧。这款
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解放军向全国进军
batch.answers is: ['1951年']
pre answer is : 。精

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 2,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([10, 18,  7, 12, 16, 10, 11, 11, 10,  9,  9,  4, 11,  8,  8, 17],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205##3人民币，美元(usd)兑
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : (400##8
batch.answers is: ['4008-400-301']
pre answer is : i=n##q##vs。i:一段导体中的电流
batch.answers is: ['I=nqvs']
pre an

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  9,  9,  9],
        ...,
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([10, 17, 10,  9,  9, 15, 16, 12,  5, 15,  5, 10, 17, 12, 10,  5],
       device='cuda:1')
pre answer is : 打胎不是小事还是要考
batch.answers is: ['博爱']
pre answer is : .5m##b/秒，360
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 饮
batch.answers is: ['一个拳头大小']
pre answer is : 都
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年采购成本/应
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer 

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([10, 11,  5,  4, 17, 10,  6, 13,  4,  9, 16, 18, 10, 17, 10,  4],
       device='cuda:1')
pre answer is : 大家都知道,手足口病
batch.answers is: ['6月龄']
pre answer is : 元
batch.answers is: ['100']
pre answer is : 招行有东航
batch.answers is: ['招行']
pre answer is : 阿哲的男
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方形次之；长方形最
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中共
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19
batch.answers is: ['双鱼座']
pre answer is : 平台需费用3000-
batch.answers is: ['3000-万元']
pre answer is : 九寨沟位
batch.answers 

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11, 11, 10, 10, 17,  5, 17, 17,  4,  5,  4,  9,  4,  5,  9, 10],
       device='cuda:1')
pre answer is : 阅读
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北两大生
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的双眼
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : :po
batch.answers is: ['1000-7000元']
pre answer is : )的焦油含量只有
batch.answers is: ['60元', '每包才60元钱']
pre answer is : 五星级酒店
batch.a

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([17, 16, 10, 17, 16,  3,  7, 10, 17, 17, 16, 16,  6,  7, 13, 11],
       device='cuda:1')
pre answer is : 标准有所改变，
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,劳动合同可
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : ，在发动后30秒
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑战第三季》成
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女
batch.answers is: ['神奇女侠']
pre ans

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([16, 12,  6, 11,  8, 11,  4, 10, 16,  5, 18, 13, 18,  5,  9, 11],
       device='cuda:1')
pre answer is : 水我国拥有了第
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 我们
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从第
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配的处理器型
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 似
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 自己有
bat

x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([18,  5,  7,  9, 16, 15, 17, 16, 11,  8,  9, 15, 16,  8,  8, 17],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每月1号,还款日为
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一
batch.answers is: ['怀孕3个小时之后']
pre answer is : 3周，可以
batch.answers is: ['2-3周']
pre a

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 12, 12, 12],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11]], device='cuda:1')
e_idx: tensor([ 4,  6, 17,  8,  4, 10,  8, 11,  9,  8,  5,  4, 10,  9,  9, 11],
       device='cuda:1')
pre answer is : 性价比要
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 注的中考时
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语翻译
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设置set##tings
batch.answers is: ['30分钟']
pre answer is : 如果是窄

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 11, 10, 16, 12,  4,  4, 10,  5,  9, 11, 12, 17, 11,  5,  9],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 岁，如
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 一个国家级新区落
batch.answers is: ['4月1日']
pre answer is : 试时间
batch.answers is: ['6月17日']
pre answer is : 椅子的名
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬
batch.answers is: ['5～8个月', '5～8个']
pre answer is : 在家兼

origin: 预计为5月中旬#5月中旬
pred: 年山东公务
score: 0
----------------------------
origin: 谢新达
pred: 猪哥亮（
score: 0
----------------------------
origin: 30000.15平方米
pred: 米
score: 0.4
----------------------------
origin: 友谊之光
pred: 歌曲：友
score: 0.28571428571428575
----------------------------
origin: 一级
pred: 社区卫生服务站为基层
score: 0
----------------------------
origin: 剑宗
pred: 目前版本4
score: 0
----------------------------
origin: 1922年5月
pred: 年团（简称
score: 0.25
----------------------------
origin: 1.22亿
pred: 张末，张艺谋的女儿
score: 0
----------------------------
origin: 20年
pred: 一只青蛙活20年，
score: 0.4444444444444445
----------------------------
origin: 游 戏王：卡片力量6
pred: 自然是psp
score: 0
----------------------------
origin: T510
pred: t##51##0最先
score: 0
----------------------------
origin: 江苏无锡
pred: 05##10是江苏
score: 0.5
----------------------------
origin: 100元
pred: 门票：
score: 0
----------------------------
origin: 三板斧
pred: 程咬金三板斧程咬金
score: 0.5
----------------------------
origin: 几千
pred: 是也是要看
score: 0
-----

----------------------------
origin: 6周以上
pred: 早期孕妇的妊娠反应十分厉害，六成以
score: 0.1
----------------------------
origin: 0.3～0.5kg/(kg.d)
pred: 
score: 0
----------------------------
origin: 9月18日
pred: 9月18日九
score: 0.888888888888889
----------------------------
origin: 克炎王
pred: 脂溢性皮炎洗头注
score: 0.18181818181818182
----------------------------
origin: 0.27—0.4mm
pred: 生长速度头发是毛发中生长
score: 0
----------------------------
origin: 按汽车报废年限规定
pred: 按照国家的规
score: 0.13333333333333333
----------------------------
origin: 早上九点半
pred: 东方盐湖城门票价格
score: 0
----------------------------
origin: 两块钱#15~18元左右#15~18元
pred: 
score: 0
----------------------------
origin: 1100#1100元
pred: 这款融入多种镀层的du
score: 0
----------------------------
origin: 闪迪酷豆USB3.0#闪迪CZ43酷豆系列#闪迪CZ43酷豆USB3.0#闪迪
pred: 闪迪作为存储领头羊品牌，在技
score: 0.2666666666666667
----------------------------
origin: 2017年2月份
pred: 宝马汽车正式发布了2017款新一代宝马5
score: 0.0909090909090909
----------------------------
origin: 900元#900元左右
pred: 者托付
score: 0
-------------------

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size(

input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([11,  5,  4,  9, 10, 12, 11, 12, 10, 17,  5,  9,  6, 10, 11, 10],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件:
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。理
batch.answers is: ['成年人']
pre answer is : 北京时间4月21日消息,
batch.answers is: ['2017年6月23日']
p

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ..., 10, 10, 10]], device='cuda:1')
e_idx: tensor([12, 16, 17,  3,  5, 16,  9, 17,  9, 19, 16, 17, 12,  5, 11, 17],
       device='cuda:1')
pre answer is : 《人民的名义》是由最高人
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 能带多少瓦国标
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体映泰z2##70##gt##6电
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个
batch.answers is: ['三个']
pre answer is : 日前,oppo官微正式宣布,将于6月10
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演
batch.answers is: ['20160109期']
pre answer is : ,黄海波177
batch.answ

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([11, 17,  7, 11, 15,  4, 17, 16, 15, 11, 18, 11, 18, 17, 10, 16],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5##×
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 。考公务员需
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始
batch.answers is: ['约十天', '十天']
pre answer is : 5种
batch.answers is: ['23到28之间', '23到28']
pr

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 1,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([ 7, 10, 10, 11, 19, 17,  8, 15, 17,  5, 12,  5, 17, 10,  5, 14],
       device='cuda:1')
pre answer is : 据林语堂先生《
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质吧
batch.answers is: ['5000以上']
pre answer is : 下为青年。60-74
batch.answers is: ['45-59岁']
pre answer is : -100，000根头发，不同
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answer is : mac

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 14, 14, 14],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 7,  9,  9,  5, 17, 10, 11, 16, 16,  7,  9, 14, 17, 18, 11, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主
batch.answers is: ['三']
pre answer is : 每个星期二吧。这款
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据《中华人民共
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证
batch.answers is: ['6个']
pre answer is : 1949年10月1日，在解放军向全国进军
batch.answers is: ['1951年']
pre answer i

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([10, 18,  7, 13, 16, 10, 17, 18, 10,  9,  9,  4, 11,  8, 11, 17],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205##3人民币，美元(usd)兑
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : (400##8-
batch.answers is: ['4008-400-301']
pre answer is : i=n##q##vs。i:一段导体中的电流
batch.answers is: ['I=nqvs']
pre answer is : 关羽的真正生日是在农
batch.answers is: ['农历六月二十四日']
pre answer is : 央广网北京9月10日消息

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 14, 14, 14],
        [ 0,  0,  0,  ...,  9,  9,  9],
        ...,
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([10, 17, 10, 11,  9, 15, 16, 12,  5, 15,  5, 10, 17, 12, 10,  5],
       device='cuda:1')
pre answer is : 打胎不是小事还是要考
batch.answers is: ['博爱']
pre answer is : 秒，360
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 饮
batch.answers is: ['一个拳头大小']
pre answer is : 是100
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 应付账款周转率=年采购成本/应
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer is : ，它的价格
batch.answers is: ['100万以上']
pre answer is : 局关于
batch.answers is: ['11%']
p

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  1,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 9, 11,  5,  4, 17, 10,  6, 13,  4,  9, 16, 18, 10, 17,  5,  4],
       device='cuda:1')
pre answer is : 大家都知道,手足口
batch.answers is: ['6月龄']
pre answer is : 元
batch.answers is: ['100']
pre answer is : 招行有东航
batch.answers is: ['招行']
pre answer is : 阿哲的男
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方形次之；长方形最
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 中国共产党,简称中共
batch.ans

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([12, 11, 10, 10, 17,  5, 17, 17,  4, 10,  4,  9,  4,  5, 11, 10],
       device='cuda:1')
pre answer is : 阅读，
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北两大生
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5月末做的双眼
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : :po
batch.answers is: ['1000-7000元']
pre answer is : )的焦油含量只有
batch.answers is: ['60元', '每包才60元钱']
pre answer is : 五星级酒店
batch.answers is: ['五星级']
pre answer is : 何年龄都可以移
batch.answers is: ['一般在两年左右', '一般在两年'

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([17, 16, 10, 17, 16,  3,  8, 10, 17, 17, 16, 16,  6,  8, 19, 11],
       device='cuda:1')
pre answer is : 标准有所改变，
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,劳动合同可
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm
batch.answers is: ['6米']
pre answer is : ，在发动后30秒
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑战第三季》成
batch.answers is: ['2017年7月2日', '2017年7月2日每周日晚21:00']
pre answer is : 神奇女
batch.answers is: ['神奇女侠']
pre answer is : 根据腾讯公司规定
batch.answers is: ['50', '50个']
pre answer is : ，
batch.answers i

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([16, 12,  5, 12,  8, 17,  4, 10, 16,  5, 18, 13, 18,  6,  9,  8],
       device='cuda:1')
pre answer is : 水我国拥有了第
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 我们
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配的处理器型号
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤
batch.answers 

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([18,  5,  7,  9, 16, 15, 17, 18, 16,  8,  9, 15, 16,  8,  8, 17],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每月1号,还款日为
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽然灰
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一
batch.answers is: ['怀孕3个小时之后']
pre answer is : 3周，可以
batch.answers is: ['2-3周']
pre answer is : 取1000成
batch.answers is: ['1200', '1000']
pre answer is : 心>手机教程
batch.answers is: ['5分钟']
pre answer is 

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 12, 12, 12],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11]], device='cuda:1')
e_idx: tensor([ 4,  6, 17,  6,  4, 10,  8, 11,  9,  8,  5,  4, 10,  9,  9, 11],
       device='cuda:1')
pre answer is : 性价比要
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 注的中考时
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设置set##tings
batch.answers is: ['30分钟']
pre answer is : 如果是窄口铅笔裙
batch.answers is: ['高腰与迷你还有A字结合在一起']
pre answer is : 
batch.answers is: ['170+17

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 5, 17, 10, 16, 12,  4,  4, 10,  5,  9, 17, 12, 17, 11,  5,  9],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 岁，如果没有担任团
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 一个国家级新区落
batch.answers is: ['4月1日']
pre answer is : 试时间
batch.answers is

origin: 肚子
pred: 差不多怀孕，感觉肚子胀
score: 0.33333333333333337
----------------------------
origin: 人性的弱点
pred: 《人性的弱点》继《
score: 0.9090909090909091
----------------------------
origin: 华康利爽#左氧氟沙星
pred: 首先确定是否血尿是否被经血
score: 0
----------------------------
origin: 74流明以上#500流明以上
pred: 键部件主要是cr
score: 0
----------------------------
origin: 2010年6月8日凌晨1点
pred: 2010年6月8日凌晨1
score: 0.9473684210526316
----------------------------
origin: 本卡帐号的后6位
pred: ，密
score: 0
----------------------------
origin: 不超过1升
pred: 可随身携带总量不
score: 0.15384615384615385
----------------------------
origin: 1美元兑6.2284元人民币
pred: 年国民经济和社
score: 0.125
----------------------------
origin: 400--802--0212#400-802-0212
pred: 部颁发的等级证
score: 0
----------------------------
origin: 药妆
pred: 数码产品才不用买,便宜不了多少
score: 0
----------------------------
origin: 网络情人节
pred: 网络情人节,这是虚拟
score: 0.7142857142857143
----------------------------
origin: 1:1000-10000#一般用1:5000到1:10000#1:5000到1:10000
pred: 给出了一个范围
score: 0.15384615384615383
-------------

----------------------------
origin: 毛利兰
pred: 毛利兰，虽然灰
score: 0.6666666666666666
----------------------------
origin: 怀孕3个小时之后
pred: 首先玩家需要挑选一
score: 0
----------------------------
origin: 2-3周
pred: 3周，可以
score: 0.3333333333333333
----------------------------
origin: 1200#1000
pred: 取1000成
score: 0.5
----------------------------
origin: 5分钟
pred: 心>手机教程
score: 0
----------------------------
origin: 666.67平方米
pred: 亩=15市亩1公
score: 0
----------------------------
origin: 80-100#90分左右#90分
pred: 北外托福老师给你解答：这是2种考
score: 0
----------------------------
origin: 100片
pred: 看怎么吃的了如果
score: 0
----------------------------
origin: 枭羹
pred: 46、端午节前后的2
score: 0
----------------------------
origin: 大约为4小时#4小时
pred: 不着急充电,
score: 0
----------------------------
origin: 67569元
pred: 查询近年的数
score: 0
----------------------------
origin: 2063G
pred: 百度云免费网盘容
score: 0
----------------------------
origin: 英雄集结#群雄割据
pred: 还是英雄集结比较
score: 0.6666666666666666
----------------------------
origin: 2734.5万辆
pred: 20

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size(

input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100,

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([19,  4,  4,  9, 10, 12, 16, 12, 10, 17,  4, 16,  6, 10, 11, 16],
       device='cuda:1')
pre answer is : 建议你分组进行,这也是科学的锻炼方式,
batch.answers is: ['3~4组', '3~4']
pre answer is : 下载文件
batch.answers is: ['哒哒网游加速器3.0.rar', '哒哒网游加速器']
pre answer is : 《我想》
batch.answers is: ['我想']
pre answer is : 赵亮是文职干部,没
batch.answers is: ['没有军衔']
pre answer is : 一般科目三考完后隔天
batch.answers is: ['隔天就可以', '隔天']
pre answer is : 理肤泉适合成年人使用。理
batch.answers is: ['成年人']
pre answer is : 北京时间4月21日消息,据国外媒体
batch.answers is: ['20

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ..., 10, 10, 10]], device='cuda:1')
e_idx: tensor([12, 16, 16,  3,  5, 16,  9, 17,  9, 19, 16, 10, 12,  5, 11, 17],
       device='cuda:1')
pre answer is : 《人民的名义》是由最高人
batch.answers is: ['预计于2017年年初', '2017年年初']
pre answer is : 能带多少瓦国标
batch.answers is: ['3500~5500W']
pre answer is : 而这次评测的主要载体映泰z2##70##gt##6
batch.answers is: ['映泰Z270GT6']
pre answer is : 推荐一
batch.answers is: ['QQ浏览器']
pre answer is : 一共有三个
batch.answers is: ['三个']
pre answer is : 布,将于6月10
batch.answers is: ['6月10日']
pre answer is : 经搜索,侯明昊出演
batch.answers is: ['20160109期']
pre answer is : ,黄海波177
batch.answers is: ['最多也

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([11, 17,  7, 11, 15,  4, 17, 16, 15, 11, 18, 11, 18, 17, 10, 16],
       device='cuda:1')
pre answer is : 正常的子宫约是7##×##5##×
batch.answers is: ['约是7×5×3cm，容量约5ml', '7×5×3cm，容量约5ml', '长7-8cm，宽4-5cm，厚2-3cm，容量约5ml']
pre answer is : 务。考公务员需
batch.answers is: ['大专及其以上的学历']
pre answer is : 蚕从由孵化开始
batch.answers is: ['约十天', '十天']
pre answer is : 5种
batch.answers is: ['23到28之间', '23到28']
p

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 2,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([ 7, 10, 17, 11, 19, 17,  8, 15, 17,  5, 15,  5, 17, 11,  5, 14],
       device='cuda:1')
pre answer is : 据林语堂先生《
batch.answers is: ['九十三人（不包括其受到株连的亲属)']
pre answer is : 早期梅毒4##～6周能治愈
batch.answers is: ['早期梅毒4～6周']
pre answer is : 2017年头伏是公历2017年7月12日至2017年7
batch.answers is: ['公历2017年7月12日', '公历2017-7-12']
pre answer is : 你先查看下医院的资质吧
batch.answers is: ['5000以上']
pre answer is : 下为青年。60-74
batch.answers is: ['45-59岁']
pre answer is : -100，000根头发，不同
batch.answers is: ['平均一天长0.03-0.04厘米']
pre answ

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 14, 14, 14],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 7,  7,  9,  5, 17, 10, 13, 16, 16,  7,  9, 14, 17, 18, 11, 10],
       device='cuda:1')
pre answer is : 叙利亚内部，主
batch.answers is: ['三']
pre answer is : 每个星期二吧。
batch.answers is: ['星期二']
pre answer is : 我们姑且按照人品差
batch.answers is: ['15000左右', '15000']
pre answer is : 阿胶又名盆
batch.answers is: ['驴皮']
pre answer is : 有限责任公司，是指根据《中华人民共
batch.answers is: ['有限责任公司']
pre answer is : 没得比，《人民的名义
batch.answers is: ['破一']
pre answer is : 一个身份证可以实名认证6个
batch.answers is: ['6个']
pre answer is : 解放军向全国进军
batch.answers is: ['1951年']
pre answer is : 尚白金卡
bat

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8]], device='cuda:1')
e_idx: tensor([10, 18,  7, 13, 16, 10, 17, 18, 10,  9,  9,  4, 12,  8, 11, 17],
       device='cuda:1')
pre answer is : 大家应该都知道,潘雨
batch.answers is: ['三', '三段婚姻']
pre answer is : 1美元=6.205##3人民币，美元(usd)兑
batch.answers is: ['1美元=6.2053人民币']
pre answer is : 微信小程序是一
batch.answers is: ['不需要下载安装即可使用的应用']
pre answer is : (400##8-
batch.answers is: ['4008-400-301']
pre answer is : 体中的电流
batch.answers is: ['I=nqvs']
pre answer is : 关羽的真正

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 14, 14, 14],
        [ 0,  0,  0,  ...,  9,  9,  9],
        ...,
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([10, 17, 10, 11,  9, 15, 16, 12,  5, 15,  5, 17, 17, 16, 10,  5],
       device='cuda:1')
pre answer is : 打胎不是小事还是要考
batch.answers is: ['博爱']
pre answer is : 秒，360
batch.answers is: ['2.5MB/秒', '2400KB/S']
pre answer is : 饮
batch.answers is: ['一个拳头大小']
pre answer is : 是100
batch.answers is: ['100+']
pre answer is : 哥以前到那里去玩,
batch.answers is: ['本科第二批', '2A', '本科第二批(2A)']
pre answer is : 成本/应
batch.answers is: ['应付账款周转天数=360/应付账款周转率']
pre answer is : ，它的价格
batc

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  1,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 9, 11,  5,  4, 17, 15,  6, 13,  3,  9, 16, 18,  7, 17,  8,  4],
       device='cuda:1')
pre answer is : 大家都知道,手足口
batch.answers is: ['6月龄']
pre answer is : 元
batch.answers is: ['100']
pre answer is : 招行有东航
batch.answers is: ['招行']
pre answer is : 阿哲的男
batch.answers is: ['阿钊']
pre answer is : 圆的面积最大；正方形次之；长方形最
batch.answers is: ['圆的面积最大', '圆']
pre answer is : 共,成立于1921
batch.answers is: ['1921年7月']
pre answer is : 阳历是2月19
batch.answers is: ['双鱼座']
pre answer is : 3000-
batch.answers is: ['3000-万元']
pre answer is : 九寨沟
batch.answers is: ['四川

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 2,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([12, 11,  4, 10, 17,  5, 17, 17,  4, 10,  4,  9,  4,  5, 11, 10],
       device='cuda:1')
pre answer is : 阅读，
batch.answers is: ['最少需要2-3个月', '2-3个月']
pre answer is : cg##t绿城拥有南北两大生
batch.answers is: ['行业的最前端']
pre answer is : 我2011年5
batch.answers is: ['炫美整形', '米扬整形']
pre answer is : :po
batch.answers is: ['1000-7000元']
pre answer is : )的焦油含量只有
batch.answers is: ['60元', '每包才60元钱']
pre answer is : 五星级酒店
batch.answer

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([17, 16, 16, 17, 97,  3,  8, 10, 17, 17, 16, 16,  6,  8, 19, 16],
       device='cuda:1')
pre answer is : 标准有所改变，
batch.answers is: ['4000元']
pre answer is : 《劳动法》第21条规定,劳动合同可
batch.answers is: ['6个月']
pre answer is : 无缝管长度在3000~12000##mm，其中包括有
batch.answers is: ['6米']
pre answer is : ，在发动后30秒
batch.answers is: ['发动后30秒至一分钟后']
pre answer is : 导读:近日,《极限挑战第三季》成员现身上海,[UNK]男人帮[UNK]还是那些[UNK]男人帮[UNK],成员还是[UNK]孙红雷、黄渤、黄磊、罗志祥、王迅、张艺兴[UNK],据悉,《极限挑战第三季》将于2017年7月2日每周日晚21:0

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([16, 12,  5, 12,  8, 15,  4, 10, 16,  5, 18, 13, 18,  6,  9,  8],
       device='cuda:1')
pre answer is : 水我国拥有了第
batch.answers is: ['2017年4月26日上午', '2017年4月26日']
pre answer is : 我们
batch.answers is: ['几千到上万不等', '几千到上万']
pre answer is : 个人认为从
batch.answers is: ['黑旗', '第2部', '3代']
pre answer is : 最大可以搭配的处理器型号
batch.answers is: ['酷睿2四核 Q8300']
pre answer is : 平时多喝点绿豆汤
batch.answers is: ['重庆【蓝天】医院']
pre answer is : 似云，好一
batch.answers is: ['3月18日 14:00', '3月18日']
pre answer is : 1983年迁至
batch.answers is: ['中国医学科学院肿瘤医院肿瘤研究所']
pre answer is : 自己有

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 13, 13, 13]], device='cuda:1')
e_idx: tensor([18,  5,  5,  9, 16, 15, 17, 18, 16,  7,  9, 15, 16,  8,  6, 17],
       device='cuda:1')
pre answer is : 蚂蚁花呗的账单日为每月1号,还款日为
batch.answers is: ['确认收货后的下月1号—9号(或10号)']
pre answer is : 青少年该如
batch.answers is: ['太原龙城中医白癜风医院']
pre answer is : 毛利兰，虽
batch.answers is: ['毛利兰']
pre answer is : 首先玩家需要挑选一
batch.answers is: ['怀孕3个小时之后']
pre answer is : 3周，可以
batch.answers is: ['2-3周']
pre an

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 12, 12, 12],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11]], device='cuda:1')
e_idx: tensor([ 4,  6, 17,  6,  4, 13,  8, 11,  9, 14,  6,  4, 17,  9,  9, 18],
       device='cuda:1')
pre answer is : 性价比要
batch.answers is: ['欧歌钢琴城']
pre answer is : 南瓜头稻草人
batch.answers is: ['一个']
pre answer is : 注的中考时
batch.answers is: ['6月23日-25日']
pre answer is : 形式上的英语
batch.answers is: ['formal; formally; in form']
pre answer is : 邓霓仪是
batch.answers is: ['邓肇坚爵士的孙女', '邓永锵的胞妹']
pre answer is : session的超时时间设置set##tings中session_
batch.answers is: ['30分钟']
pre answer is

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 5, 17, 10, 16, 12,  5,  4, 10,  5,  9, 15, 12, 17, 12,  5,  9],
       device='cuda:1')
pre answer is : 黑袍的身份
batch.answers is: ['周独夫的妹妹']
pre answer is : 岁，如果没有担任团
batch.answers is: ['28周岁']
pre answer is : 现代的suv就那么几款，
batch.answers is: ['名图']
pre answer is : 一个国家级新区落
batch.answers is: ['4月1日']
pre answer is : 试时间
batch.answers is: ['6月17日']
pre answer is : 椅子的名称
batch.answers is: ['汉魏', '唐代']
pre answer is : 去杭州胤
batch.answers is: ['杭州胤隆会', '胤隆会']
pre answer is : 相对来说厨师工资是不
batch.answers is: ['厨师']
pre answer is : [UNK]七坐八爬
batch.answer

score: 0
----------------------------
origin: 在副职业导师的左边
pred: 暗精灵遗迹在
score: 0.13333333333333333
----------------------------
origin: 4001-577-598#400-1577-598
pred: 调【官
score: 0
----------------------------
origin: 均价1224元/吨#均价1224元
pred: [UNK]整体上看,今年上半年玻璃现货市
score: 0
----------------------------
origin: 一条狗的使命#为了与你相遇
pred: 一条狗的
score: 0.8
----------------------------
origin: 放风草# 防风草
pred: 这里推荐大家种放风草。防风草一株成
score: 0.3157894736842105
----------------------------
origin: 方特东方神话主题乐园
pred: 上个月新开业的方特
score: 0.2105263157894737
----------------------------
origin: 1810mAH
pred: 苹果新一代iphone6拥有2个版本,一个是4.
score: 0
----------------------------
origin: 1-3万左右#1-3万
pred: 蕃茄田加盟费多少?加盟
score: 0
----------------------------
origin: 122
pred: 公民在遇有交通事故
score: 0
----------------------------
origin: 四个小时
pred: 1.食物吃进肚中
score: 0
----------------------------
origin: CD时间40分钟
pred: 阴阳师草莓音乐节鬼王cd有多久?草莓
score: 0.08695652173913045
----------------------------
origin: 怀孕8-10天左右#怀孕8-10天
pred: 数据的研
score: 0
------

pred: 名或空间，然后再找
score: 0
----------------------------
origin: 莫雨
pred: 唐三十六
score: 0
----------------------------
origin: 成都西南男科医院
pred: 您好,成都西南男科医
score: 0.823529411764706
----------------------------
origin: 第十六章
pred: 推荐打第
score: 0.25
----------------------------
origin: 97周年
pred: 今年是[UNK]五四[UNK]运动
score: 0.16666666666666666
----------------------------
origin: 联想ideapad
pred: 玩游戏呢
score: 0
----------------------------
origin: 长征七号素质拓展训练基地
pred: （1300）长征
score: 0.26666666666666666
----------------------------
origin: 192厘米
pred: 中国拳王张君龙身高192厘
score: 0.28571428571428575
----------------------------
origin: 守柴炉烤鸭
pred: 大家如果资金充足,且
score: 0
----------------------------
origin: 一般在0.95g/cm^3#0.95g/cm^3
pred: 准
score: 0
----------------------------
origin: 艾里扎尔锤须大叔
pred: 粗线这种情况应该是你
score: 0
----------------------------
origin: 9月20日至21日
pred: 2015年一建考试时间http:/
score: 0
----------------------------
origin: 1年
pred: 苹果
score: 0
----------------------------
origin: 加利福尼亚州#加州#California
pred

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size(

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 27, 27, 27],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  7,  7,  7]], device='cuda:1')
e_idx: tensor([12, 16, 29,  4,  4,  8, 16,  8,  4, 14,  9, 12, 17,  5, 16,  8],
       device='cuda:1')
pre answer is : 绝地求生大逃杀将在今日(
batch.answers is: ['迅游国际网游加速器']
pre answer is : 乘车路线:乘820、98##6、97##4、通州
batch.answers is: ['亦庄工业园站下车即到', '亦庄工业园站', '北京大兴区亦庄经济开发区经海三路']
pre answer is : 国内
batch.answers is: ['今年年中']
pre answer is : 司棋[1
batch.answers is: ['贾迎春的丫头']
pre answer is : 全国都在
batch.answers is: ['移动WLAN的附加产品']
pre answer is : 小皙护肤品补水效
batch.answers is: ['238元']
pre an

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  2,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 11, 11, 11]], device='cuda:1')
e_idx: tensor([  4,  17,   9,  18,  11,   9,   8,  17,  17, 139,  17,  18,  11,   9,
         14,  17], device='cuda:1')
pre answer is : 一条狗的
batch.answers is: ['一条狗的使命', '为了与你相遇']
pre answer is : 这里推荐大家种放风草。防风草一株成
batch.answers is: ['放风草', ' 防风草']
pre answer is : 上个月新开业的方特
batch.answers is: ['方特东方神话主题乐园']
pre answer is : 苹果新一代iphone6拥有2个版本,一个是4.
batch.answers is: ['1810mAH']
pre answer is : 加盟
batch.answers is: ['1-3万左右', '1-3万']
pre answer is : 公民在遇有交通事故
batch.answers is: ['122']
pre answer is : 1.食物吃进肚中
batch.answers is: ['四个小时']
pre answer is : 阴阳师草莓音

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 2,  ..., 8, 8, 8],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([16, 10,  9, 14,  9,  7,  8, 10,  5,  7, 17, 10, 16,  4, 11, 17],
       device='cuda:1')
pre answer is : 西红柿的生长发育周期从种子播种、
batch.answers is: ['15---30天']
pre answer is : cctv##9记录频道伟大的卫
batch.answers is: ['伟大的卫国战争']
pre answer is : 务
batch.answers is: ['95543']
pre answer is : 的蒂芬
batch.answers is: ['大概500铢一个人', '500铢一个人']
pre answer is : 中国香港一直沿用欧
batch.answers is: ['43码']
pre answer is : .沪通铁路预计
batch.answers is: ['预计2017年', '2017年']
pre answer is : 我用腾讯手机管家
batch.answers is: ['腾讯手机管家']
pre answer is : 到
batch.answers is: ['3000到9000不等', '3000到9000']
pre answer is : 抛开其他，
ba

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 9, 9, 9]], device='cuda:1')
e_idx: tensor([ 10,  11,   8,   9, 139, 111,   4,   5,  10,   9,   9,  12,  16,  10,
         13,  10], device='cuda:1')
pre answer is : 《欢乐颂2》是由东阳
batch.answers is: ['2017年5月11日']
pre answer is : 单家银行最高可以贷到24
batch.answers is: ['24万', '16万左右', '16万', '房产市值的八成']
pre answer is : 2、不错，特别是
batch.answers is: ['不错']
pre answer is : 阿里众包是阿里巴巴
batch.answers is: ['阿里众包']
pre answer is : 汉艺画开天文化传播有限公司发行的一部3d动画,前传为《疯味英雄》。2017年3月31日发布预告片,5月3日开始更新。[1]中文名幻镜诺德琳原版名称疯味英雄其他名称疯味英雄第二季类型热血科幻冒险地区中国大陆播放期间2017年5月3日开始更新发行武汉艺画开天文化传播有限公司
batch.answers is: ['2017年5月3日']
pre answer is : 层级。4500工资属于第1阶层收税。工资税收起征点是3500块

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ..., 15, 15, 15],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  6,  6,  6],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([26, 13,  9, 17,  4,  5,  9,  8, 16, 16, 18, 17, 10, 29, 17, 13],
       device='cuda:1')
pre answer is : 人流手术，也就是六七百
batch.answers is: ['800-1000左右', '800-1000']
pre answer is : 据了解，c919，是中国继运-10
batch.answers is: ['190座']
pre answer is : 有注册
batch.answers is: ['500元左右', '500元']
pre answer is : 1小时：血糖6
batch.answers is: ['血糖≤7.8毫摩/升', '≤7.8毫摩/升']
pre answer is : 中式油烟
batch.answers is: ['中式']
pre answer is : 一个打开小
batch.answers is: ['阴道口']
pre answer is : 应该计入

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 9,  9, 16,  8, 17,  8, 18, 18,  6,  4,  5,  8,  9,  8,  8,  5],
       device='cuda:1')
pre answer is : 如果你要用苹果6补
batch.answers is: ['4000左右', '4000']
pre answer is : nba老鲍尔是今年nba选
batch.answers is: ['拉瓦尔·鲍尔']
pre answer is : 恐怖之源萨科390##0点券★★★★\x##0##a小
batch.answers is: ['9900点券']
pre answer is : 您可以看一下当地
batch.answers is: ['兰州中研白癜风研究院']
pre answer is : 的市民,可以提
batch.answers is: ['3个月']
pre answer is : 欢乐颂2更新到第
batch.answers is: ['52', '52集']
pre answer is : 60秒角度是量度角的单
batch.answers is: ['60分']
pre answer is : 发布时间上，相信作为2017款车型来说，其
batch.answers is: ['2016年年内']
pre answer 

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 4,  8,  8,  4, 13,  8, 10,  7,  4, 13, 18, 10, 17,  5,  9,  9],
       device='cuda:1')
pre answer is : 家居装修
batch.answers is: ['木地板']
pre answer is : 高压锅炖排骨一般
batch.answers is: ['一般20分钟左右', '一般20分钟', '20分钟', '20分钟左右']
pre answer is : 荣耀5##x畅玩版与
batch.answers is: ['大概会在11月1日正式开售', '11月1日正式开售', '11月1日']
pre answer is : 欢乐颂电
batch.answers is: ['曲筱绡']
pre answer is : 通过实名认证的用户发送的红
batch.answers is: ['200元', '200']
pre answer is : 冰点脱毛是目前最
batch.answers is: ['冰点半导体激光脱毛仪器']
pre answer is : 
batch.a

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ...,  3,  3,  3],
        [ 0,  0,  0,  ..., 10, 10, 10],
        ...,
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ..., 11, 11, 11]], device='cuda:1')
e_idx: tensor([10,  9, 16, 17,  9,  6, 18, 10,  6, 16,  5, 16, 10, 17, 10, 51],
       device='cuda:1')
pre answer is : 您好,成都西南男科医
batch.answers is: ['成都西南男科医院']
pre answer is : 你怎么保养一
batch.answers is: ['一次450左右']
pre answer is : 你的咨询,正
batch.answers is: ['90-140/60-90mmhg之间', '90-140/60-90mmhg']
pre answer is : 早在情人
batch.answers is: ['预计会是710点券', '710点券', '788点券']
pre answer is : 小米手机便签存放在
batch.answers is: ['MIUI/notes文件夹']
pre answer is : 白起，我国战
batch.answers is: ['白起']
pre answer is : 美赞臣三阶段是180毫升四勺奶粉,勺子比
batch.answers is: ['180毫升四勺奶粉']
pre answer i

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  1,  2,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 12, 12, 12],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  8,  8,  8]], device='cuda:1')
e_idx: tensor([10, 16,  5,  3, 12,  4,  5, 18, 13, 16, 98, 17,  8, 16,  7, 16],
       device='cuda:1')
pre answer is : 国际儿童节，简称儿童
batch.answers is: ['6月1日']
pre answer is : 款平台的审核标
batch.answers is: ['1到3个工作日']
pre answer is : 谢童的扮演
batch.answers is: ['邓伦']
pre answer is : 泡制10
batch.answers is: ['10天后']
pre answer is : 介绍:中国联合网
batch.answers is: ['10010']
pre answer is : 是指牛肉
batch.answers is: ['牛肉', '三叉']
pre answer is : 360是绿色软
batch.answers is: ['非系统盘']
pre answer is :

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ..., 10, 10, 10],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  7,  7,  7],
        [ 0,  1,  2,  ..., 12, 12, 12]], device='cuda:1')
e_idx: tensor([18, 16, 12, 18,  5, 11,  6,  5, 31, 12,  8,  9,  8,  8, 13, 16],
       device='cuda:1')
pre answer is : 低于600度，如果超过600度，就属于高度近
batch.answers is: ['低于600度']
pre answer is : 1200抽到此皮肤,还
batch.answers is: ['1000-2000元以内']
pre answer is : 般是
batch.answers is: ['1匹', '1匹空调']
pre answer is : 已公
batch.answers is: ['1月13日-16日']
pre answer is : 腰痛是以腰
batch.answers is: ['骨科']
pre answer i

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8],
        ...,
        [0, 0, 0,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([18, 11, 11, 14, 16, 15, 14,  7,  8, 16, 16, 13, 12, 16,  7,  4],
       device='cuda:1')
pre answer is : 专家指出，宫颈糜烂治疗时间应选在月经
batch.answers is: ['月经干净后3-7日']
pre answer is : 下午3点至5点是最佳运
batch.answers is: ['下午3点至5点']
pre answer is : ，价格
batch.answers is: ['美利达公爵600']
pre answer is : 网
batch.answers is: ['⑴400--100--8087 〔2〕400-9967-698']
pre answer is : ##mm*127##mm.
batch.answers is: ['89mm*127mm']
pre answer is : 发票的综合税点
batch.answers is: ['4.7%左右', '4.7%']
pre answer is : 属于高端笔记本处理器，性能强
batch.answers is: ['4代']
pre answer is : 跖疣，它是由于
batch.answers is: ['祛疣ling药膏治']
pre answe

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  1,  2,  ..., 10, 10, 10],
        [ 0,  0,  2,  ..., 11, 11, 11],
        ...,
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 9, 16, 29, 15, 10,  4, 17,  6,  5,  8, 10,  4,  6, 18,  7,  6],
       device='cuda:1')
pre answer is : t##p-link路由器的初
batch.answers is: ['admin']
pre answer is : 固定温度的，
batch.answers is: ['没有固定温度']
pre answer is : 皮带的时间要求都不一致，普遍在6万至
batch.answers is: ['6万至10万公里']
pre answer is : 上市的问题总结
batch.answers is: ['2017年4月21日']
pre answer is : 集数44集《宫锁连城》
batch.answers is: ['44集', '63集']
pre answer is : 剑三，天
batch.answers is: ['剑三']
pre answer is : 编经咨询中国建筑工业出版社客服，得
batch.answers is: ['5月初']
pre answer is : 喜马拉雅猫，
batch.answers is: ['喜马拉雅猫', '暹罗猫'

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  7,  7,  7],
        ...,
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 9,  9, 14,  4,  5,  9,  8, 15,  9, 10, 17, 16,  9, 14,  5, 18],
       device='cuda:1')
pre answer is : 市场上的防腐木用材
batch.answers is: ['1800-2000左右', '1800-2000']
pre answer is : 
batch.answers is: ['2022年09月10日-2022年09月25日']
pre answer is : 度大于6000米的只
batch.answers is: ['11034米', '11034']
pre answer is : 小马真名
batch.answers is: ['马彦毅']
pre answer is : 小猫咪出生
batch.answers is: ['出生一个月后']
pre answer is : googlemap只适合在美国有
batch.answers is: ['Sygic']
pre answer is : 5号线地铁起自余
batch.answers is: ['2019年', '计划2019年']
pre answer is : ）且
batch.answers 

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 13, 13, 13],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([15, 16, 16,  4,  6, 98,  8, 16,  5,  8,  3,  4, 18, 12, 22, 29],
       device='cuda:1')
pre answer is : 据悉,《寒武纪》由周雨彤、侯明
batch.answers is: ['5月8日24点', '每周二至周五']
pre answer is : 油钱6000(算一年1##w公里,每公里大
batch.answers is: ['约等于2W', '2W']
pre answer is : 小黄鸭很不错啊～喜欢你点映137##8万
batch.answers is: ['2.7亿']
pre answer is : 女警手长
batch.answers is: ['女警']
pre answer is : 空中客车a3##80
batch.answers is: ['555名', '555']
pre answer is : 生产日期pd即product##iondate有效期gp即g##ua##rant##e

origin: 2015年10月26日—10月31日
pred: 间为11月1日，考生
score: 0.11111111111111112
----------------------------
origin: 7500公里-10000公里左右#7500公里-10000公里
pred: 空调风道蒸发箱和空调滤
score: 0
----------------------------
origin: 中本聪
pred: 比特币的
score: 0
----------------------------
origin: 45～59岁
pred: 联合国曾在一份文件中把14岁至25
score: 0.11764705882352941
----------------------------
origin: 紫背菜
pred: 这个是紫背菜。形
score: 0.6
----------------------------
origin: 4000001999
pred: 您好，很高兴为你解答。滴滴打车
score: 0
----------------------------
origin: 环城河附近
pred: 第一个半天可以先去鲁
score: 0
----------------------------
origin: 房款的3%#3%
pred: 
score: 0
----------------------------
origin: 刘传龙
pred: 值得网友好奇的莫过于
score: 0
----------------------------
origin: 肥鸭
pred: 配料：肥鸭
score: 0.6666666666666666
----------------------------
origin: 800-1000左右#800-1000
pred: 人流手术，也就是六七百
score: 0
----------------------------
origin: 190座
pred: 据了解，c919，是中国继运-10
score: 0
----------------------------
origin: 500元左右#500元
pred: 有注册
score: 0
----------------------------

origin: 预计在明年#明年
pred: 时隔八年幻想三国志再出新作,在2015年的时候曾经就报道出在明年(2016
score: 0.17647058823529413
----------------------------
origin: 15岁
pred: 张丹峰儿子张浩锋，今年15
score: 0.15384615384615385
----------------------------
origin: 大概在200元到1000元#200元到1000元
pred: 计算和确定一个衣
score: 0
----------------------------
origin: 10010
pred: 话
score: 0
----------------------------
origin: 上午
pred: 喝绿茶最好的时间
score: 0
----------------------------
origin: 乌鲁木齐市的华凌市场
pred: 乌鲁木齐市的华凌
score: 0.888888888888889
----------------------------
origin: 一般在几百至千元不等#一般在几百至千元#几百至千元#几百至千元不等
pred: 的要求比较高
score: 0
----------------------------
origin: 6周
pred: 的复议申
score: 0
----------------------------
origin: 4000元
pred: 标准有所改变，
score: 0
----------------------------
origin: 6个月
pred: 《劳动法》第21条规定,劳动合同可
score: 0
----------------------------
origin: 6米
pred: 无缝管长度在3000~12000##mm，其中包括有
score: 0
----------------------------
origin: 发动后30秒至一分钟后
pred: ，在发动后30秒
score: 0.625
----------------------------
origin: 2017年7月2日#2017年7月2日每周日晚21:00
pred: 导读

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size(

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
input_q.size: torch.Size([16, 100])
encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 27, 27, 27],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  7,  7,  7]], device='cuda:1')
e_idx: tensor([17, 16, 29,  3,  4,  7, 16,  8,  4, 14,  9, 12, 17,  5, 16,  8],
       device='cuda:1')
pre answer is : 绝地求生大逃杀将在今日(3月23日)
batch.answers is: ['迅游国际网游加速器']
pre answer is : 乘车路线:乘820、98##6、97##4、通州
batch.answers is: ['亦庄工业园站下车即到', '亦庄工业园站', '北京大兴区亦庄经济开发区经海三路']
pre answer is : 国内
batch.answers is: ['今年年中']
pre answer is : 司棋[
batch.answers is: ['贾迎春的丫头']
pre answer is : 全国都在
batch.answers is: ['移动WLAN的附加产品']
pre answer is : 小皙护肤品补水
batch.answers is: ['238元']
pre answer is : 截止目前,本次比赛总奖金数已累积
batch.answers is: ['880万美元']
pre answer is : 病情分析:您好

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[  0,   0,   0,  ...,   0,   0,   0],
        [  0,   0,   0,  ...,   0,   0,   0],
        [  0,   0,   0,  ...,   0,   0,   0],
        ...,
        [  0,   0,   2,  ...,   9,   9,   9],
        [  0,   0,   0,  ...,   0,   0,   0],
        [  0,   0,   0,  ..., 152, 152, 152]], device='cuda:1')
e_idx: tensor([ 10,  17,  10,  18,  11,   9,   8,  17,  17, 139,  17,  18,  11,   9,
         14, 159], device='cuda:1')
pre answer is : 一条狗的使命9.0分
batch.answers is: ['一条狗的使命', '为了与你相遇']
pre answer is : 这里推荐大家种放风草。防风草一株成
batch.answers is: ['放风草', ' 防风草']
pre answer is : 上个月新开业的方特东
batch.answers is: ['方特东方神话主题乐园']
pre answer is : 苹果新一代iphone6拥有2个版本,一个是4.
batch

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 2,  ..., 8, 8, 8],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([16,  7,  9, 14, 13,  7,  8, 10,  5,  7, 17, 10, 16,  4, 11, 17],
       device='cuda:1')
pre answer is : 西红柿的生长发育周期从种子播种、
batch.answers is: ['15---30天']
pre answer is : cctv##9记录频道伟
batch.answers is: ['伟大的卫国战争']
pre answer is : 务
batch.answers is: ['95543']
pre answer is : 的蒂芬
batch.answers is: ['大概500铢一个人', '500铢一个人']
pre answer is : 中国香港一直沿用欧洲的尺码
batch.answers is: ['43码']
pre answer is : .沪通铁路预计
batch.answers is: ['预计2017年', '2017年']
pre answer is : 我用腾讯手机管家
batch.answers is: ['腾讯手机管家']
pre a

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 7, 7, 7]], device='cuda:1')
e_idx: tensor([ 10,  11,   8,   9, 139, 111,   4,   5,  10,   9,   9,  12,  16,  10,
         13,  10], device='cuda:1')
pre answer is : 《欢乐颂2》是由东阳
batch.answers is: ['2017年5月11日']
pre answer is : 单家银行最高可以贷到24
batch.answers is: ['24万', '16万左右', '16万', '房产市值的八成']
pre answer is : 2、不错，特别是
batch.answers is: ['不错']
pre answer is : 阿里众包是阿里巴巴
batch.answers is: ['阿里众包']
pre answer is : 司
batch.answers is: ['2017年5月3日']
pre answer is : 
batch.answers is: ['30元']
pre answer is : 《末路情
batch.answers is: ['末路情途']
pre answer is : 大庆时代广
batch.answers is: ['大庆时代广场', '铁人纪念馆', '石人山']
pre answer is : 越南|新山机场

encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ..., 15, 15, 15],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  6,  6,  6],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([26, 13,  9, 17,  4,  6,  9,  8, 16, 16, 18, 17, 98, 29, 17, 13],
       device='cuda:1')
pre answer is : 人流手术，也就是六七百
batch.answers is: ['800-1000左右', '800-1000']
pre answer is : 据了解，c919，是中国继运-10
batch.answers is: ['190座']
pre answer is : 有注册
batch.answers is: ['500元左右', '500元']
pre answer is : 1小时：血糖6
batch.answers is: ['血糖≤7.8毫摩/升', '≤7.8毫摩/升']
pre answer is : 中式油烟
batch.answer

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([17,  9, 16,  8, 17,  8, 16, 18,  6,  4,  5,  8,  9,  8,  7,  5],
       device='cuda:1')
pre answer is : 如果你要用苹果6补差换购全新正品苹
batch.answers is: ['4000左右', '4000']
pre answer is : nba老鲍尔是今年nba选
batch.answers is: ['拉瓦尔·鲍尔']
pre answer is : 恐怖之源萨科390##0点券★★★★\x##0##a小
batch.answers is: ['9900点券']
pre answer is : 您可以看一下当地
batch.answers is: ['兰州中研白癜风研究院']
pre answer is : 的市民,可以提
batch.answers is: ['3个月']
pre answer is : 欢乐颂2更新到第
batch.answers is: ['52', '52集']
pre answer is : 60秒角度是量度角
batch.answers is

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 4,  8,  8,  4, 13,  8, 11,  7,  4, 13, 18, 10, 17,  5,  9,  9],
       device='cuda:1')
pre answer is : 家居装修
batch.answers is: ['木地板']
pre answer is : 高压锅炖排骨一般
batch.answers is: ['一般20分钟左右', '一般20分钟', '20分钟', '20分钟左右']
pre answer is : 荣耀5##x畅玩版与
batch.answers is: ['大概会在11月1日正式开售', '11月1日正式开售', '11月1日']
pre answer is : 欢乐颂电
batch.answers is: ['曲筱绡']
pre answer is : 通过实名认证的用户发送的红
batch.answers is: ['200元', '200']
pre answer is : 冰点脱毛是目前最
batch.answers is: ['冰点半导体激光脱毛仪器']
pre answer is : 证金比例一
ba

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ...,  3,  3,  3],
        [ 0,  0,  0,  ..., 10, 10, 10],
        ...,
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ..., 17, 17, 17]], device='cuda:1')
e_idx: tensor([ 10,   9,  16,  17,   9,   6,  18,  10,   7,  16,   5,  16,  10, 120,
         16,  51], device='cuda:1')
pre answer is : 您好,成都西南男科医
batch.answers is: ['成都西南男科医院']
pre answer is : 你怎么保养一
batch.answers is: ['一次450左右']
pre answer is : 你的咨询,正
batch.answers is: ['90-140/60-90mmhg之间', '90-140/60-90mmhg']
pre answer is : 早在情人
batch.answers is: ['预计会是710点券', '710点券', '788点券']
pre answer is : 小米手机便签存放在
batch.answers is: ['MIUI/notes文件夹']
pre answer is : 白起，我国战
batch.answers is: ['白起']
pre answer is : 美赞臣三阶段是180毫升四勺奶粉,勺子比
batch.answers is: ['180毫升四勺

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  1,  2,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ..., 12, 12, 12],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  8,  8,  8]], device='cuda:1')
e_idx: tensor([ 9, 16,  5,  4, 12,  4,  5, 21, 13, 16, 98, 17,  8, 22,  8, 16],
       device='cuda:1')
pre answer is : 国际儿童节，简称儿
batch.answers is: ['6月1日']
pre answer is : 款平台的审核标
batch.answers is: ['1到3个工作日']
pre answer is : 谢童的扮演
batch.answers is: ['邓伦']
pre answer is : 泡制10天
batch.answers is: ['10天后']
pre ans

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  8,  8,  8],
        [ 0,  0,  0,  ..., 10, 10, 10],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  7,  7,  7],
        [ 0,  0,  2,  ..., 12, 12, 12]], device='cuda:1')
e_idx: tensor([18, 16, 12, 18,  5, 17,  6,  5, 32, 12,  8, 14,  8,  3, 13, 16],
       device='cuda:1')
pre answer is : 度，就属于高度近
batch.answers is: ['低于600度']
pre answer is : 1200抽到此皮肤,还
batch.answers is: ['1000-2000元以内']
pre answer is : 般是
batch.answers is: ['1匹', '1匹空调']
pre answer is : 已公
batch.answers is: ['1月13日-16日']
pre answer is : 腰痛是以腰
batch.answers is: ['骨科']
pre answer is : 病情分析:你好，你的情况属于正常的
batch.answers is: ['10几个']
pre answer is : ;世界上最长
batch.answers is: ['绿茸线

encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 8, 8, 8],
        ...,
        [0, 0, 0,  ..., 9, 9, 9],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:1')
e_idx: tensor([ 18,  11,  11,  14,  16,  15,  14,   6,   8, 214,  16,  13,  12,  18,
          7,   4], device='cuda:1')
pre answer is : 专家指出，宫颈糜烂治疗时间应选在月经
batch.answers is: ['月经干净后3-7日']
pre answer is : 下午3点至5点是最佳运
batch.answers is: ['下午3点至5点']
pre answer is : ，价格
batch.answers is: ['美利达公爵600']
pre answer is : 网
batch.answers is: ['⑴400--100--8087 〔2〕400-9967-698']
pre answer is : ##mm*127##mm.
batch.answers is: ['89mm*127mm']
pre answer is : 发票的综合税点
batch.answers is: ['4.7%左右', '4.7%']
pre answer is : 器，性能强
batch.answers is: ['4代']
pre answer is : 跖疣，它是由
batch.answers is: ['祛疣ling药膏治']
pr

encoder_out11 size: torch.Size([16, 100, 1024])
x.size: torch.Size([16, 100, 256])
encoder_out size: torch.Size([16, 100, 256])
input_q.size: torch.Size([16, 400])
encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  1,  2,  ..., 12, 12, 12],
        [ 0,  0,  2,  ..., 11, 11, 11],
        ...,
        [ 0,  0,  0,  ..., 11, 11, 11],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 9, 16, 29, 15, 10,  4, 17,  6,  5,  8, 10,  4,  6, 18,  7,  6],
       device='cuda:1')
pre answer is : t##p-link路由器的初
batch.answers is: ['admin']
pre answer is : 温度的，
batch.answers is: ['没有固定温度']
pre answer is : 皮带的时间要求都不一致，普遍在6万至
batch.answers is: ['6万至10万公里']
pre answer is : 上市的问题总结
batch.answe

encoder_out11 size: torch.Size([16, 400, 1024])
x.size: torch.Size([16, 400, 256])
encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  9,  9,  9],
        [ 0,  0,  0,  ...,  9,  9,  9],
        ...,
        [ 0,  0,  0,  ..., 10, 10, 10],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([ 9, 17, 14,  4,  5,  9,  8, 15,  9, 10, 17, 16,  9, 14,  5, 18],
       device='cuda:1')
pre answer is : 市场上的防腐木用材
batch.answers is: ['1800-2000左右', '1800-2000']
pre answer is : 2022年09月10日-2022
batch.answers is: ['2022年09月10日-2022年09月25日']
pre answer is : 于6000米的只
batch.answers is: ['11034米', '11034']
pre answer is : 小马真名
batch.answers is: ['马彦毅']
pre answer is : 小猫咪出生
batch.answers is: ['出生一个月后']
pre answer is : googlemap只适合在美国有
batch.answers is: ['Sygic']


encoder_out size: torch.Size([16, 400, 256])
cq_attn torch.Size([16, 400, 100])
qc_attn torch.Size([16, 1, 400])
QCAttention x.size== torch.Size([16, 400, 768])
s_idx: tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0,  0,  2,  ..., 13, 13, 13],
        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:1')
e_idx: tensor([15, 16, 16,  4,  6, 98,  8, 16,  5,  8,  3,  4, 98, 12, 22, 29],
       device='cuda:1')
pre answer is : 据悉,《寒武纪》由周雨彤、侯明
batch.answers is: ['5月8日24点', '每周二至周五']
pre answer is : 油钱6000(算一年1##w公里,每公里大
batch.answers is: ['约等于2W', '2W']
pre answer is : 小黄鸭很不错啊～喜欢你点映137##8万
batch.answers is: ['2.7亿']
pre answer is : 女警手长
batch.answers is: ['女警']
pre answer is : 空中客车a3##80
batch.answers is: ['555名', '555']
pre answer is : 生产日期pd即product##iondate有效期gp即g##ua##rant##eepe##rio##d生产日期是指商品在生产线上完成所有工序，经过检验并包装成为可在市场上销售的成品时的日期和时间。有效期是指在规定的贮藏条件下质量能够符合规定要求的

score: 0
----------------------------
origin: 应付账款周转天数=360/应付账款周转率
pred: 成本/应
score: 0.09523809523809523
----------------------------
origin: 100万以上
pred: 殊要求的烘干场所
score: 0
----------------------------
origin: 11%
pred: 务总局关于
score: 0
----------------------------
origin: 麦顿金典天然粮
pred: 麦顿金典
score: 0.7272727272727273
----------------------------
origin: 至少8000万到9000万之间#至少8000万到9000万
pred: 有人统计过，饿死了至少8000万到9000
score: 0.5714285714285714
----------------------------
origin: 农村商业银行#信用社银行
pred: 农村信用社
score: 0.6
----------------------------
origin: 没有具体的分数线
pred: 考分数线+80分。若
score: 0.39999999999999997
----------------------------
origin: 1070万
pred: 巴巴成功登陆纽交
score: 0
----------------------------
origin: 238.03美元
pred: 表现受到市
score: 0
----------------------------
origin: 极速前进第三季
pred: 《极速前进第三季》是
score: 0.9333333333333333
----------------------------
origin: 6个
pred: 长春市现辖
score: 0
----------------------------
origin: 五年之后
pred: 法:无法清除,必
score: 0
----------------------------
origin: 奔腾E8700
pred:

pred: 空中客车a3##80
score: 0
----------------------------
origin: GP
pred: 生产日期pd即product##iondate有效期gp即g##ua##rant##eepe##rio##d生产日期是指商品在生产线上完成所有工序，经过检验并包装成为可在市场上销售的成品时的日期和时间。有效期是指在规定的贮藏条件下质量能够符合规定要求的期限。
score: 0.023255813953488372
----------------------------
origin: 魅蓝note3
pred: 魅蓝note##3性价比更
score: 0.4
----------------------------
origin: 2017年7月7日
pred: 《加勒比海盗5》原计划定于2015年7
score: 0.2
----------------------------
origin: 十一章
pred: 剑灵主线任
score: 0
----------------------------
origin: 高德#高德地图
pred: 高德是中国领先的
score: 0.4
----------------------------
origin: 飞利浦
pred: oled电视
score: 0
----------------------------
origin: 464医院
pred: 46##4医院
score: 0.6666666666666666
----------------------------
origin: 下周
pred: 青年节啊海贼王漫画86##4话什么时候更新86##4话情报剧情预测&#58##94##2;&#58##94##3;来源：闽南网发布：2017-05-0209:21:01五一长假在日本也是黄金周放假的时候，所以《海贼王》漫画下一话86##4话将在下周更新，
score: 0.05633802816901409
----------------------------
origin: 0.27—0.4mm
pred: 生长速度头发是毛发中生长
score: 0
----------------------------
origin: 150-280元左右#150-28