In [27]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from transformers.modeling_utils import (WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
                             SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
from transformers import XLNetTokenizer, XLNetForSequenceClassification, XLNetPreTrainedModel, XLNetModel
from torch.nn import CrossEntropyLoss, BCEWithLogitsLoss
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score


import pandas as pd
import numpy as np
import random
from IPython.display import clear_output
import re
from utils import *
from tqdm.notebook import tqdm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class Dataset_MRR(Dataset):
    def __init__(self, mode, tokenizer):
        assert mode in ["test_2"]
        self.mode = mode
        self.df = pd.read_csv(mode + ".tsv", sep="\t").fillna("")
        self.len = len(self.df)
        self.tokenizer = tokenizer
        
    def __getitem__(self, idx):
        text_a, text_b, text_eval, label = self.df.iloc[idx, :].values
        label_tensor = torch.tensor(label)
            
        # sentence_a tokens
        word_pieces = []
        tokens_a = self.tokenizer.tokenize(text_a + '<SEP>')
        word_pieces += tokens_a
        len_a = len(tokens_a)
        
        # sentence_b tokens
        tokens_b = self.tokenizer.tokenize(text_b + '<SEP><CLS>')
        word_pieces += tokens_b
        len_b = len(word_pieces) - len_a
        
        # 將 token 序列轉換成索引序列
        ids = self.tokenizer.convert_tokens_to_ids(word_pieces)
        tokens_tensor = torch.tensor(ids)
        
        # 將第一句 token 位置設為 0，其他為 1 表示第二句
        segments_tensor = torch.tensor([0] * len_a + [1] * (len_b-1) + [2], 
                                        dtype=torch.long)
        
        return (tokens_tensor, segments_tensor, label_tensor, text_a, text_b, text_eval)
    
    def __len__(self):
        return self.len
    
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)

In [10]:
dataset = Dataset_MRR("test_2", tokenizer=tokenizer)

In [11]:
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

def create_mini_batch(samples):
    tokens_tensors = [s[0] for s in samples]
    segments_tensors = [s[1] for s in samples]
    text_a = [s[3] for s in samples]
    text_b = [s[4] for s in samples]
    text_eval = [s[5] for s in samples]
    
    if samples[0][2] is not None:
        label_ids = torch.stack([s[2] for s in samples])
    else:
        label_ids = None
    
    # zero pad 到同一序列長度
    tokens_tensors = pad_sequence(tokens_tensors, 
                                  batch_first=True)
    segments_tensors = pad_sequence(segments_tensors, 
                                    batch_first=True)
    
    # attention masks，將 tokens_tensors 裡頭不為 zero padding
    # 的位置設為 1，讓 model 只關注這些位置的 tokens
    masks_tensors = torch.zeros(tokens_tensors.shape, 
                                dtype=torch.long)
    masks_tensors = masks_tensors.masked_fill(
        tokens_tensors != 0, 1)
    
    return tokens_tensors, segments_tensors, masks_tensors, label_ids, text_a, text_b, text_eval


# 初始化回傳訓練樣本的 DataLoader
# 利用 `collate_fn` 將 list of samples 合併成一個 mini-batch 
BATCH_SIZE = 1
testloader = DataLoader(dataset, batch_size=1, collate_fn=create_mini_batch)

In [34]:
def calculate(model, dataloader, tokenizer):
    total = len(dataloader)
    entail_total = 0
    entail_total_len = 0
    neutral_total = 0
    neutral_total_len = 0
    contradict_total = 0
    contradict_total_len = 0
    
    entail_correct = 0
    entail_correct_len = 0
    neutral_correct = 0
    neutral_correct_len = 0
    contradict_correct = 0
    contradict_correct_len = 0
    
    entail_MRR_c = 0.
    neutral_MRR_c = 0.
    contradict_MRR_c = 0.
    
    entail_MRR_inc = 0.
    neutral_MRR_inc = 0.
    contradict_MRR_inc = 0.
    
    model.eval()
    with torch.no_grad():
        data_iterator = tqdm(dataloader, desc='Iteration')
        for data in data_iterator:
            if next(model.parameters()).is_cuda:
                data = [t.to("cuda:0") for t in data if t is not None]
            # predict
            tokens_tensors, segments_tensors, masks_tensors = data[:3]
            sentence_a = data[4][0]
            sentence_b = data[5][0]
            eval_sentence = data[6][0]
            outputs = model(input_ids=tokens_tensors, 
                            token_type_ids=segments_tensors, 
                            attention_mask=masks_tensors)
            logits = outputs[0]
            _, pred = torch.max(logits.data, 1)
            
            # divide 3 class
            label = data[3]
            MRR, length = explainability_compare(model, tokenizer, sentence_a, sentence_b, eval_sentence)

            if label == torch.tensor([0]):
                entail_total += 1
                entail_total_len += length
                if pred == label:
                    entail_correct += 1
                    entail_correct_len += length
                    entail_MRR_c += MRR
                else:
                    entail_MRR_inc += MRR
            elif label == torch.tensor([1]):
                neutral_total += 1
                neutral_total_len += length
                if pred == label:
                    neutral_correct += 1
                    neutral_correct_len += length
                    neutral_MRR_c += MRR
                else:
                    neutral_MRR_inc += MRR
            else:
                contradict_total += 1
                contradict_total_len += length
                if pred == label:
                    contradict_correct += 1
                    contradict_correct_len += length
                    contradict_MRR_c += MRR
                else:
                    contradict_MRR_inc += MRR
    if contradict_correct_len == 0:
        contradict_correct += 1
        
                    
    
    return {
        'total':total,
        'total_MRR':round((entail_MRR_c+entail_MRR_inc+
                           neutral_MRR_c+neutral_MRR_inc+
                           contradict_MRR_c+contradict_MRR_inc)/total, 4),
        'total_acc':round((entail_correct+neutral_correct+contradict_correct)/total, 2),
        'total_mean_len':round((entail_total_len+neutral_total_len+contradict_total_len)/total, 1),
        'entail_total':entail_total,
        'entail_acc':round(entail_correct/entail_total, 2),
        'entail_mean_len':round(entail_total_len/entail_total, 1),
        'entail_MRR':round((entail_MRR_c+entail_MRR_inc)/entail_total, 4),
        'entail_correct':entail_correct,
        'entail_correct_mean_len':round(entail_correct_len/entail_correct, 1),
        'entail_MRR_c':round(entail_MRR_c/entail_correct, 4),
        'entail_incorrect':entail_total-entail_correct,
        'entail_incorrect_mean_len':round((entail_total_len-entail_correct_len)/(entail_total-entail_correct), 2),
        'entail_MRR_inc':round(entail_MRR_inc/(entail_total-entail_correct), 4),
        'neutral_total':neutral_total,
        'neutral_acc':round(neutral_correct/neutral_total, 2),
        'neutral_mean_len':round(neutral_total_len/neutral_total, 1),
        'neutral_MRR':round((neutral_MRR_c+neutral_MRR_inc)/neutral_total, 4),
        'neutral_correct':neutral_correct,
        'neutral_correct_mean_len':round(neutral_correct_len/neutral_correct, 1),
        'neutral_MRR_c':round(neutral_MRR_c/neutral_correct, 4),
        'neutral_incorrect':neutral_total-neutral_correct,
        'neutral_incorrect_mean_len':round((neutral_total_len-neutral_correct_len)/(neutral_total-neutral_correct), 2),
        'neutral_MRR_inc':round(neutral_MRR_inc/(neutral_total-neutral_correct), 4),
        'contradict_total':contradict_total,
        'contradict_acc':round(contradict_correct/contradict_total, 2),
        'contradict_mean_len':round(contradict_total_len/contradict_total, 1),
        'contradict_MRR':round((contradict_MRR_c+contradict_MRR_inc)/contradict_total, 4),
        'contradict_correct':contradict_correct,
        'contradict_correct_mean_len':round(contradict_correct_len/contradict_correct, 1),
        'contradict_MRR_c':round(contradict_MRR_c/contradict_correct, 4),
        'contradict_incorrect':contradict_total-contradict_correct,
        'contradict_incorrect_mean_len':round((contradict_total_len-contradict_correct_len)/(contradict_total-contradict_correct), 2),
        'contradict_MRR_inc':round(contradict_MRR_inc/(contradict_total-contradict_correct), 4),
    }
        

In [16]:
%%time
model = torch.load('contra_63_24.pkl', map_location=torch.device('cpu'))
model_multi_result = calculate(model, testloader, tokenizer)
model_multi_result

Iteration: 100%|██████████| 540/540 [22:57<00:00,  2.55s/it]

CPU times: user 40min 35s, sys: 27 s, total: 41min 2s
Wall time: 22min 57s





{'total': 540,
 'total_MRR': 0.0252,
 'total_acc': 0.57,
 'total_mean_len': 243.3,
 'entail_total': 270,
 'entail_acc': 0.69,
 'entail_mean_len': 259.9,
 'entail_MRR': 0.0286,
 'entail_correct': 187,
 'entail_correct_mean_len': 247.3,
 'entail_MRR_c': 0.0302,
 'entail_incorrect': 83,
 'entail_incorrect_mean_len': 288.29,
 'entail_MRR_inc': 0.0249,
 'neutral_total': 191,
 'neutral_acc': 0.57,
 'neutral_mean_len': 205.9,
 'neutral_MRR': 0.0221,
 'neutral_correct': 108,
 'neutral_correct_mean_len': 206.7,
 'neutral_MRR_c': 0.0219,
 'neutral_incorrect': 83,
 'neutral_incorrect_mean_len': 204.88,
 'neutral_MRR_inc': 0.0223,
 'contradict_total': 79,
 'contradict_acc': 0.19,
 'contradict_mean_len': 277.0,
 'contradict_MRR': 0.0216,
 'contradict_correct': 15,
 'contradict_correct_mean_len': 243.4,
 'contradict_MRR_c': 0.0245,
 'contradict_incorrect': 64,
 'contradict_incorrect_mean_len': 284.83,
 'contradict_MRR_inc': 0.0209}

In [15]:
%%time
model_single = torch.load('single_056.pkl',map_location=torch.device('cpu'))
model_single_result = calculate(model_single, testloader, tokenizer)
model_single_result

Iteration: 100%|██████████| 540/540 [22:37<00:00,  2.51s/it]

CPU times: user 40min 6s, sys: 26.9 s, total: 40min 32s
Wall time: 22min 38s





{'total': 540,
 'total_MRR': 0.0228,
 'total_acc': 0.54,
 'total_mean_len': 243.3,
 'entail_total': 270,
 'entail_acc': 0.49,
 'entail_mean_len': 259.9,
 'entail_MRR': 0.026,
 'entail_correct': 133,
 'entail_correct_mean_len': 232.5,
 'entail_MRR_c': 0.0293,
 'entail_incorrect': 137,
 'entail_incorrect_mean_len': 286.46,
 'entail_MRR_inc': 0.0228,
 'neutral_total': 191,
 'neutral_acc': 0.74,
 'neutral_mean_len': 205.9,
 'neutral_MRR': 0.02,
 'neutral_correct': 142,
 'neutral_correct_mean_len': 204.8,
 'neutral_MRR_c': 0.02,
 'neutral_incorrect': 49,
 'neutral_incorrect_mean_len': 208.98,
 'neutral_MRR_inc': 0.0198,
 'contradict_total': 79,
 'contradict_acc': 0.24,
 'contradict_mean_len': 277.0,
 'contradict_MRR': 0.0185,
 'contradict_correct': 19,
 'contradict_correct_mean_len': 353.6,
 'contradict_MRR_c': 0.0168,
 'contradict_incorrect': 60,
 'contradict_incorrect_mean_len': 252.7,
 'contradict_MRR_inc': 0.0191}

In [39]:
%%time
model_pretrained = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased',output_attentions=True,
                                                                  num_labels=3)
model_pretrained_result = calculate(model_pretrained, testloader, tokenizer)
model_pretrained_result

HBox(children=(IntProgress(value=0, description='Iteration', max=540, style=ProgressStyle(description_width='i…


CPU times: user 40min 46s, sys: 26 s, total: 41min 12s
Wall time: 23min 13s


{'total': 540,
 'total_MRR': 0.0224,
 'total_acc': 0.37,
 'total_mean_len': 243.3,
 'entail_total': 270,
 'entail_acc': 0.46,
 'entail_mean_len': 259.9,
 'entail_MRR': 0.0249,
 'entail_correct': 124,
 'entail_correct_mean_len': 280.4,
 'entail_MRR_c': 0.0234,
 'entail_incorrect': 146,
 'entail_incorrect_mean_len': 242.44,
 'entail_MRR_inc': 0.0262,
 'neutral_total': 191,
 'neutral_acc': 0.29,
 'neutral_mean_len': 205.9,
 'neutral_MRR': 0.0203,
 'neutral_correct': 55,
 'neutral_correct_mean_len': 204.3,
 'neutral_MRR_c': 0.023,
 'neutral_incorrect': 136,
 'neutral_incorrect_mean_len': 206.56,
 'neutral_MRR_inc': 0.0192,
 'contradict_total': 79,
 'contradict_acc': 0.29,
 'contradict_mean_len': 277.0,
 'contradict_MRR': 0.0191,
 'contradict_correct': 23,
 'contradict_correct_mean_len': 259.7,
 'contradict_MRR_c': 0.02,
 'contradict_incorrect': 56,
 'contradict_incorrect_mean_len': 284.04,
 'contradict_MRR_inc': 0.0187}

In [17]:
import csv
#with open('output.csv', 'w', newline='') as csvfile:

with open('multi_252.csv', 'w') as f:
    w = csv.DictWriter(f, model_multi_result.keys())
    w.writeheader()
    w.writerow(model_multi_result)

# with open('single.csv', 'w') as f:
#     w = csv.DictWriter(f, model_single_result.keys())
#     w.writeheader()
#     w.writerow(model_single_result)

In [24]:
sentence_a = """losail qatar afp torrential rain caused the seasonopening qatar motogp to be cancelled on sunday leaving officials and teams in a frenzy before deciding to race on monday instead at this floodlit desert venue. monsoonlike conditions accompanied by swirling winds arrived just moments before australia's casey stoner on pole position was due to lead defending world champion valentino rossi and the other riders away on the warmup lap. it's just unlucky with the weather said australian ducati rider stoner the 2007 world champion who was bidding for a third successive win here."""
sentence_b = "valentino rossi won the seasonopening qatar motogp."
test_sentence_a = """torrential rain caused the seasonopening qatar motogp to be cancelled"""



In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids'].to(device)
input_ids.squeeze()
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
token_type_ids = inputs['token_type_ids'].to(device)

In [38]:
first = model(input_ids, token_type_ids=token_type_ids)[-1]

In [35]:
model.eval()
with torch.no_grad():
    attention = model(input_ids, token_type_ids=token_type_ids)[-1]

In [39]:
attn = format_attention(attention, tokens)  
tokens = format_special_chars(tokens)
sentence_b_start = token_type_ids[0].tolist().index(1)
slice_a = slice(0, sentence_b_start)
slice_b = slice(sentence_b_start, len(tokens))
attn_data = attn[:, :, slice_a, slice_b]
sentence_a_tokens = tokens[slice_a]
sentence_b_tokens = tokens[slice_b]
pair = pair_match(sentence_a_tokens, sentence_b_tokens, attn_data=attn_data)
pair = sorted(pair, key=lambda pair: pair[2], reverse=True)
pair = pair_without_score(pair)
pair

[('season', 'season'),
 ('opening', 'opening'),
 ('moto', 'moto'),
 ('p', 'p'),
 ('g', 'g'),
 ('ino', 'ino'),
 ('tar', 'tar'),
 ('ossi', 'ossi'),
 ('r', 'r'),
 ('qa', 'qa'),
 ('valent', 'valent'),
 ('tar', 'tar'),
 ('qa', 'qa'),
 ('season', 'opening'),
 ('win', 'won'),
 ('p', 'p'),
 ('the', 'the'),
 ('g', 'moto'),
 ('p', '.'),
 ('.', '.'),
 ('opening', 'season'),
 ('defending', 'won'),
 ('a', 'won'),
 ('for', 'won'),
 ('p', '.'),
 ('af', '.'),
 ('defending', 'the'),
 ('bidding', 'won'),
 ('g', 'p'),
 ('.', 'won'),
 ('p', 'moto'),
 ('tar', 'qa'),
 ('champion', 'won'),
 ('defending', '.'),
 ('qa', 'tar'),
 ('p', 'g'),
 ('qa', 'opening'),
 ('world', '.'),
 ('up', 'opening'),
 ('torrential', '.'),
 ('tar', 'moto'),
 ("'", '.'),
 ('season', 'the'),
 ('case', '.'),
 ('.', '.'),
 ('champion', 'won'),
 ('world', 'the'),
 ('position', '.'),
 ('sun', '.'),
 ('champion', '.'),
 ('the', 'won'),
 ('riders', '.'),
 ('the', '.'),
 ('opening', 'the'),
 ('and', 'ossi'),
 ('it', '.'),
 ('world', 'won'),

In [5]:
class XLNetForMultiSequenceClassification(XLNetPreTrainedModel):

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = 3
        self.num_labels_3way = 3
        self.num_labels_multi = 5
        
        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj_3way = nn.Linear(config.d_model, self.num_labels_3way)
        self.logits_proj_multi = nn.Linear(config.d_model, self.num_labels_multi)
        
        self.weights_3way = [1, 1.5, 3]
        self.weights_multi = [4, 2, 4, 2, 2]
        self.class_weights_3way = torch.FloatTensor(self.weights_3way).to(device)
        self.class_weights_multi = torch.FloatTensor(self.weights_multi).to(device)
        
        self.init_weights()
        

    def forward(self, input_ids, attention_mask=None, mems=None, perm_mask=None, target_mapping=None,
                token_type_ids=None, input_mask=None, head_mask=None, labels=None, inputs_embeds=None):
        transformer_outputs = self.transformer(input_ids,
                                               attention_mask=attention_mask,
                                               mems=mems,
                                               perm_mask=perm_mask,
                                               target_mapping=target_mapping,
                                               token_type_ids=token_type_ids,
                                               input_mask=input_mask, 
                                               head_mask=head_mask,
                                               inputs_embeds=inputs_embeds)

        output = transformer_outputs[0]
        output = self.sequence_summary(output)
        
        if labels is None:
            logits = self.logits_proj_3way(output)
            outputs = (logits,) + transformer_outputs[1:]

        if labels is not None:
            task_check = 0
        
            if labels.size() == torch.Size([1]):
                logits_3way = self.logits_proj_3way(output)
                outputs = (logits_3way,) + transformer_outputs[1:]
                task_check = 1
            else:
                logits_multi = self.logits_proj_multi(output)
                outputs = (logits_multi,) + transformer_outputs[1:]

            if task_check:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits_3way.view(-1, self.num_labels_3way), labels.view(-1)).to(device)
            else:
                loss_fct = BCEWithLogitsLoss(pos_weight=self.class_weights_multi)
                loss = loss_fct(logits_multi.view(-1, self.num_labels_multi), labels).to(device)
            outputs = (loss,) + outputs
            
        return outputs

In [192]:
model2 = torch.load('test.pkl',map_location=torch.device('cpu'))

In [200]:
model_single = torch.load('acc_0.5_complete.pkl',map_location=torch.device('cpu'))

In [31]:
import torch
import random
from torch.utils.data import Dataset
from torch.utils.data.dataset import ConcatDataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

def format_special_chars(tokens):
    return [t.replace('Ġ', ' ').replace('▁', ' ').replace('</w>', '').replace(' ', '') for t in tokens]

def format_attention(attention, tokens):
    """ Set special token <sep>, <cls> attention to zero and format the attention """
    # set special token's attention to zero
    for i, t in enumerate(tokens):
        if t in ("<sep>", "<cls>"):
            for layer_attn in attention:
                layer_attn[0, :, i, :] = 0
                layer_attn[0, :, :, i] = 0
    squeezed = []
    for layer_attention in attention:
        # 1 x num_heads x seq_len x seq_len
        if len(layer_attention.shape) != 4:
            raise ValueError("Wrong attention length, attention length must be 4")
        squeezed.append(layer_attention.squeeze(0))
    # num_layers x num_heads x seq_len x seq_len
    return torch.stack(squeezed)

def look_score(attn_data, index_a, index_b):
    """ Look pair attention score in layers, head """
    score = 0.
    for layer in attn_data:
        for head in layer:
            score_individual = head[index_a][index_b].tolist()
            score += score_individual
    return round(score, 3)

def pair_match(sentence_a_tokens, sentence_b_tokens, attn_data=None):
    """ Matching each token in sentence_a and sentence_b and making pairs """
    pairs = []
    for index_a in range(len(sentence_a_tokens)):
        for index_b in range(len(sentence_b_tokens)):
            if attn_data is not None:
                score = look_score(attn_data, index_a, index_b)
                pair = (sentence_a_tokens[index_a], sentence_b_tokens[index_b], score)
                # filter the special token
                if score != 0:
                    pairs.append(pair)
            else:
                # for evaluation pairs
                pair = (sentence_a_tokens[index_a], sentence_b_tokens[index_b])
                pairs.append(pair)
    return pairs

def pair_without_score(pair):
    """ Return pairs without score """
    pairs = []
    for token_a, token_b, score in pair:
        if token_a != '' and token_b != '':
            pair = (token_a, token_b)
            pairs.append(pair)
    return pairs

def MRR_calculate(pair_truth, pair_all):
    final_score = 0.
    for query in pair_truth:
        for response in range(len(pair_all)):
            if pair_all[response] == query:
                score = 1/(response+1)
                final_score += score
    final_score = final_score/len(pair_truth)
    return final_score

def MRR_mean(pair_truth, pair_all, top_k, times):
    """ Choose k tokens from tokens list for calculating MRR"""
    filtered = random.choices(pair_truth, k=top_k)
    final = 0.
    for i in range(times):
        score = MRR_calculate(filtered, pair_all)
        final += score
    final = final/times
    return final

def explainability_compare(model, tokenizer, sentence_a, sentence_b, test_sentence_a):
    """ Evaluating MRR between model and attention span"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True)
    input_ids = inputs['input_ids'].to(device)
    input_ids.squeeze()
    tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
    token_type_ids = inputs['token_type_ids'].to(device)
    
    model.eval()
    with torch.no_grad():
        attention = model(input_ids, token_type_ids=token_type_ids)[-1]
    
    attn = format_attention(attention, tokens)  
    tokens = format_special_chars(tokens)
    sentence_b_start = token_type_ids[0].tolist().index(1)
    slice_a = slice(0, sentence_b_start)
    slice_b = slice(sentence_b_start, len(tokens))
    attn_data = attn[:, :, slice_a, slice_b]
    sentence_a_tokens = tokens[slice_a]
    sentence_b_tokens = tokens[slice_b]
    pair = pair_match(sentence_a_tokens, sentence_b_tokens, attn_data=attn_data)
    pair = sorted(pair, key=lambda pair: pair[2], reverse=True)
    pair = pair_without_score(pair)
    
    test_inputs = tokenizer.encode_plus(test_sentence_a, sentence_b, return_tensors='pt', add_special_tokens=False)
    test_input_ids = test_inputs['input_ids']
    test_input_ids.squeeze()
    test_tokens = tokenizer.convert_ids_to_tokens(test_input_ids.squeeze().tolist())
    test_token_type_ids = test_inputs['token_type_ids']
    test_tokens = format_special_chars(test_tokens)
    test_sentence_b_start = test_token_type_ids[0].tolist().index(1)
    test_slice_a = slice(0, test_sentence_b_start)
    test_slice_b = slice(test_sentence_b_start, len(test_tokens))
    test_sentence_a_tokens = test_tokens[test_slice_a]
    test_sentence_b_tokens = test_tokens[test_slice_b]
    test_pair = pair_match(test_sentence_a_tokens, test_sentence_b_tokens, attn_data=None)

    return MRR_calculate(test_pair, pair), len(test_pair)

In [120]:
import xml.etree.ElementTree as ET

In [210]:
root = ET.parse('RTE5_test_AttnSpan.xml').getroot()
text = []
hypothesis = []
entailment = []
attention = []

label_mapping = {'ENTAILMENT': 0, 'UNKNOWN': 1, 'CONTRADICTION': 2}

replacement = {"hasn't": 'has not', 
               "couldn't": 'could not', 
               "wasn't": 'was not', 
               "weren't": 'were not', 
               "doesn't": 'does not',
               "don't": 'do not',
               '"': '',
              }

for type_tag in root.findall('pair'):
    
    e = type_tag.get('entailment')
    t = type_tag.find('t').text
    t = t.lower()
    for word, rep in replacement.items():
        t = t.replace(word.lower(), rep)
    t = re.sub(r"([\(\)\[\]\{\}!-])", "", t)
    
    h = type_tag.find('h').text
    h = h.lower()
    for word, rep in replacement.items():
        h = h.replace(word.lower(), rep)
    h = re.sub(r"([\(\)\[\]\{\}!-])", "", h)
    
    a = type_tag.find('a').text
    a = a.lower()
    for word, rep in replacement.items():
        h = h.replace(word.lower(), rep)
    a = re.sub(r"([\(\)\[\]\{\}!-])", "", a)
    
    
    text.append(t)
    hypothesis.append(h)
    attention.append(a)
    entailment.append(label_mapping[e])
    
df_test = pd.DataFrame((zip(text, hypothesis, attention, entailment)), columns=['text_a', 'text_b', 'eval_text','label'])
df_test.to_csv("test_2.tsv", sep="\t", index=False)

In [None]:
def analyze(data):
    
    # Total
    total,
    total_MRR,
    total_acc,
    total_mean_len,
    # Entail
    entail_total,
    entail_acc,
    entail_mean_len,
    entail_MRR,
    entail_correct,
    entail_correct_mean_len,
    entail_MRR_c,
    entail_incorrect,
    entail_incorrect_mean_len,
    entail_MRR_inc,
    # Neutral
    neutral_total,
    neutral_acc,
    neutral_mean_len,
    neutral_MRR,
    neutral_correct,
    neutral_correct_mean_len,
    neutral_MRR_c,
    neutral_incorrect,
    neutral_incorrect_mean_len,
    neutral_MRR_inc,
    # Contradict
    contradict_total,
    contradict_acc,
    contradict_mean_len,
    contradict_MRR,
    contradict_correct,
    contradict_correct_mean_len,
    contradict_MRR_c,
    contradict_incorrect,
    contradict_incorrect_mean_len,
    contradict_MRR_inc = (data[key] for key in data)
      
    print("""
    ------------------------------------------------------------------------------------------------------
    |       Total: %g      |        Acc: %g       |        MRR: %g        |      Mean_length: %g       |
    ------------------------------------------------------------------------------------------------------
    |            ENTAILMENT            |             NEUTRAL           |        CONTRADICTION            |
    ------------------------------------------------------------------------------------------------------
    |
    """)

In [54]:
from transformers import pipeline

nlp = pipeline('feature-extraction')
a = nlp('Leila is a little pig')
len(a[0].size())

HBox(children=(IntProgress(value=0, description='Downloading', max=230, style=ProgressStyle(description_width=…




AttributeError: 'list' object has no attribute 'size'