In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from transformers.modeling_utils import (WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
                             SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
from transformers import XLNetTokenizer, XLNetForSequenceClassification, XLNetPreTrainedModel, XLNetModel
from torch.nn import CrossEntropyLoss, BCEWithLogitsLoss
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from torch.utils.data.dataset import ConcatDataset
from XLNet import Dataset_3Way, Dataset_multi, SquadExample, squad_convert_example_to_features

import pandas as pd
import numpy as np
import random
from IPython.display import clear_output
import re
from tqdm.notebook import tqdm, trange

In [61]:
class Dataset_Span_Detection(Dataset):
    
    def __init__(self, mode, tokenizer):
        assert mode in ["data/train_span_detection"]
        self.mode = mode
        self.df = pd.read_csv(mode + ".tsv", sep="\t").fillna("")
        self.len = len(self.df)
        self.tokenizer = tokenizer
        
    def __getitem__(self, idx):
        context_text, question_text, answer_text, start_position_character = self.df.iloc[idx,:].values
        
        example = SquadExample(
            question_text=question_text,
            context_text=context_text,
            answer_text=answer_text,
            start_position_character=start_position_character
        )

        features = squad_convert_example_to_features(example,
                                                     max_seq_length=384,
                                                     doc_stride=128,
                                                     max_query_length=128
                                                    )
        input_ids = torch.tensor(features[0].input_ids)
        attention_mask = torch.tensor(features[0].attention_mask)
        token_type_ids = torch.tensor(features[0].token_type_ids)
        start_position = torch.tensor(features[0].start_position)
        end_position = torch.tensor(features[0].end_position)
        cls_index = torch.tensor(features[0].cls_index)
        p_mask = torch.tensor(features[0].p_mask)
        
        return input_ids, attention_mask, token_type_ids, start_position, end_position, cls_index, p_mask
    
    def __len__(self):
        return self.len

In [62]:
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)

trainset_3way = Dataset_3Way("data/RTE5_train", tokenizer=tokenizer)
trainset_multi = Dataset_multi("data/train_multi_label", tokenizer = tokenizer)
#trainset_span = Dataset_Span_Detection("data/train_span_detection", tokenizer=tokenizer)
#trainset = ConcatDataset([trainset_3way, trainset_multi, trainset_span])
trainset = ConcatDataset([trainset_3way, trainset_multi])
#trainset = Dataset_3Way("data/RTE5_train", tokenizer=tokenizer)

In [71]:
trainset[2]

(tensor([[ 2011,    47,    72,  1390,    29,  1220,  3469,    25,    18,  7765,
             28,  2279, 24512,   813,  2349, 23397,   780,   431, 15261,    41,
             50,   163,  1060,    76,  1068,   456,     9,    36,   909,    99,
           1645,   891,    25,    17,  3654,   577,  8917,   365,    99,    24,
          13456,    25,    18,  1808,  2182,  7759,    90,   442,   108,    18,
          10733,    30,   194,  2279,     9,    18,  2510,  1525,    70,   100,
            139,   754,   526,    21,   891,    42,  9875,    23,    54,    50,
            655,    72,    25,  2282,     9,   365,    42,  3469,    55,   163,
          13782,    25,    24,  2175,  4140,    21, 14107,   162,     9,     4,
           2349, 23397,   780,   431, 15261, 19709,    25,    17,  3654,   577,
              9,     4,     3]]),
 tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  

In [44]:
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

def create_mini_batch(samples):
    tokens_tensors = [s[0] for s in samples]
    segments_tensors = [s[1] for s in samples]
    masks_tensors = [s[2] for s in samples]
    if samples[0][3] is not None:
        label_ids = torch.stack([s[3] for s in samples])
    else:
        label_ids = None
        
    # zero pad 到同一序列長度
    tokens_tensors = pad_sequence(tokens_tensors, 
                                  batch_first=True)
    segments_tensors = pad_sequence(segments_tensors, 
                                    batch_first=True)
    masks_tensors = pad_sequence(masks_tensors, 
                                    batch_first=True)

    return tokens_tensors.squeeze(1), segments_tensors.squeeze(1), masks_tensors.squeeze(1), label_ids


# 初始化回傳訓練樣本的 DataLoader
# 利用 `collate_fn` 將 list of samples 合併成一個 mini-batch 

trainloader = DataLoader(trainset, batch_size=1,collate_fn=create_mini_batch, shuffle=True)

In [63]:
class XLNetForMultiSequenceClassification(XLNetPreTrainedModel):

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = 3
        self.num_labels_3way = 3
        self.num_labels_multi = 5
        
        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj_3way = nn.Linear(config.d_model, self.num_labels_3way)
        self.logits_proj_multi = nn.Linear(config.d_model, self.num_labels_multi)
        self.weights_3way = [0.3, 0.5, 2]
        #self.weights_multi = [15, 10, 15, 5, 5]
        self.weights_multi = [6, 4, 6, 2, 2]
        self.class_weights_3way = torch.FloatTensor(self.weights_3way).cuda()
        self.class_weights_multi = torch.FloatTensor(self.weights_multi).cuda()
        
        self.init_weights()
        

    def forward(self, input_ids, attention_mask=None, mems=None, perm_mask=None, target_mapping=None,
                token_type_ids=None, input_mask=None, head_mask=None, labels=None, inputs_embeds=None):
        transformer_outputs = self.transformer(input_ids,
                                               attention_mask=attention_mask,
                                               mems=mems,
                                               perm_mask=perm_mask,
                                               target_mapping=target_mapping,
                                               token_type_ids=token_type_ids,
                                               input_mask=input_mask, 
                                               head_mask=head_mask,
                                               inputs_embeds=inputs_embeds)
    
        output = transformer_outputs[0]
        output = self.sequence_summary(output)
        
        if labels is None:
            logits = self.logits_proj_3way(output)
            outputs = (logits,) + transformer_outputs[1:]

        if labels is not None:
            task_check = 0
            if labels.size() == torch.Size([1]):
                logits_3way = self.logits_proj_3way(output)
                outputs = (logits_3way,) + transformer_outputs[1:]
                task_check = 1
            else:
                logits_multi = self.logits_proj_multi(output)
                outputs = (logits_multi,) + transformer_outputs[1:]

            if task_check:
                loss_fct = CrossEntropyLoss(weight=self.class_weights_3way)
                loss = loss_fct(logits_3way.view(-1, self.num_labels_3way), labels.view(-1)).cuda()
            else:
                loss_fct = BCEWithLogitsLoss(pos_weight=self.class_weights_multi)
                loss = loss_fct(logits_multi.view(-1, self.num_labels_multi), labels).cuda()
            outputs = (loss,) + outputs
            
        return outputs

In [64]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

PRETRAINED_MODEL_NAME = "xlnet-base-cased"
model = XLNetForMultiSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME,
                                                            output_attentions=True,
                                                            dropout=0.1)

In [65]:
from torch.optim import AdamW

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5, eps=1e-8)

In [66]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device)
model = model.to(device)

device: cuda:0


In [67]:
def get_predictions(model, dataloader, compute_acc=False):
    predictions = None
    correct = 0
    total = 0
      
    with torch.no_grad():
        
        for data in dataloader:
            
            if next(model.parameters()).is_cuda:
                data = [t.to("cuda:0") for t in data if t is not None]

            tokens_tensors, segments_tensors, masks_tensors = data[:3]
            outputs = model(input_ids=tokens_tensors, 
                            token_type_ids=segments_tensors, 
                            attention_mask=masks_tensors)
            logits = outputs[0]
            _, pred = torch.max(logits.data, 1)
            
            if compute_acc:
                labels = data[3]
                total += labels.size(0)
                correct += (pred == labels).sum().item()
                
            if predictions is None:
                predictions = pred
            else:
                predictions = torch.cat((predictions, pred))
    
    if compute_acc:
        acc = correct / total
        return predictions, acc
    return predictions

In [68]:
%%time
EPOCHS = 20
batch_size = 4
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=EPOCHS * ((len(trainloader)//batch_size)))
epochs_trained = 0

model.zero_grad()
train_iterator = trange(epochs_trained, EPOCHS, desc="Epoch")
set_seed(42)

for _ in train_iterator:
    epoch_iterator = tqdm(trainset, desc="Iteration")
    
    model.train()
    running_loss = 0.0
    batch_cnt = 1
    loss = torch.zeros(1).to(device)
    
    for step, data in enumerate(epoch_iterator):
        tokens_tensors, segments_tensors, masks_tensors, labels = [t.to(device) for t in data]
        
        # forward pass
        outputs = model(input_ids=tokens_tensors, 
                        token_type_ids=segments_tensors, 
                        attention_mask=masks_tensors, 
                        labels=labels)
        batch_cnt += 1
        loss = outputs[0]/batch_size
        loss.backward()
        
        if batch_cnt >= batch_size:
            optimizer.step()
            scheduler.step()
            model.zero_grad()
            batch_cnt = 0

        # 紀錄當前 batch loss
        running_loss += loss.item()
    epochs_trained += 1
        
    testset = Dataset_3Way("data/RTE5_test", tokenizer=tokenizer)
    testloader = DataLoader(testset, batch_size=1, 
                     collate_fn=create_mini_batch)
    predictions = get_predictions(model, testloader)

    df_pred = pd.DataFrame({"label": predictions.tolist()})
        
    pred_Y = df_pred['label'].values
    test_Y = pd.read_csv("data/RTE5_test.tsv", sep='\t').fillna("")['label'].values

    accuracy = accuracy_score(test_Y, np.array(pred_Y))
    precision = precision_score(test_Y, pred_Y, average='macro')
    recall = recall_score(test_Y, pred_Y, average='macro')
    fscore = f1_score(test_Y, pred_Y, average='macro')
    
    CNT = 0
    TOTAL = 0
    for i in range(len(test_Y)):
        if test_Y[i] == 2:
            TOTAL += 1
        else:
            pass
        if test_Y[i] == 2 and predictions[i] == 2:
            CNT += 1
    contra = round((CNT/TOTAL)*100,1)
    if contra > 20 and accuracy > 0.6:
        torch.save(model, "multi_%g, %g.pkl" % accuracy, CNT)
    print("Accuracy: %g\tPrecision: %g\tRecall: %g\tF-score: %g Loss: %g" % (accuracy, precision, recall, fscore, running_loss))
    print(contra)
    print("------------------------------------------")

HBox(children=(IntProgress(value=0, description='Epoch', max=20, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='Iteration', max=718, style=ProgressStyle(description_width='i…



ValueError: Target size (torch.Size([])) must be the same as input size (torch.Size([1, 5]))

In [23]:
%%time
EPOCHS = 20
batch_size = 4
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=EPOCHS * ((len(trainloader)//batch_size)))
epochs_trained = 0

model.zero_grad()
train_iterator = trange(epochs_trained, EPOCHS, desc="Epoch")
set_seed(42)

for _ in train_iterator:
    epoch_iterator = tqdm(trainloader, desc="Iteration")
    
    model.train()
    running_loss = 0.0
    batch_cnt = 1
    loss = torch.zeros(1).to(device)
    
    for step, data in enumerate(epoch_iterator):
        tokens_tensors, segments_tensors, masks_tensors, labels = [t.to(device) for t in data]
        
        # forward pass
        outputs = model(input_ids=tokens_tensors, 
                        token_type_ids=segments_tensors, 
                        attention_mask=masks_tensors, 
                        labels=labels)
        batch_cnt += 1
        loss = outputs[0]/batch_size
        loss.backward()
        
        if batch_cnt >= batch_size:
            optimizer.step()
            scheduler.step()
            model.zero_grad()
            batch_cnt = 0

        # 紀錄當前 batch loss
        running_loss += loss.item()
    epochs_trained += 1
        
    testset = Dataset_3Way("data/RTE5_test", tokenizer=tokenizer)
    testloader = DataLoader(testset, batch_size=1, 
                     collate_fn=create_mini_batch)
    predictions = get_predictions(model, testloader)

    df_pred = pd.DataFrame({"label": predictions.tolist()})
        
    pred_Y = df_pred['label'].values
    test_Y = pd.read_csv("data/RTE5_test.tsv", sep='\t').fillna("")['label'].values

    accuracy = accuracy_score(test_Y, np.array(pred_Y))
    precision = precision_score(test_Y, pred_Y, average='macro')
    recall = recall_score(test_Y, pred_Y, average='macro')
    fscore = f1_score(test_Y, pred_Y, average='macro')
    
    CNT = 0
    TOTAL = 0
    for i in range(len(test_Y)):
        if test_Y[i] == 2:
            TOTAL += 1
        else:
            pass
        if test_Y[i] == 2 and predictions[i] == 2:
            CNT += 1
    contra = round((CNT/TOTAL)*100,1)
    if contra > 20 and accuracy > 0.6:
        torch.save(model, "multi_%g, %g.pkl" % accuracy, CNT)
    print("Accuracy: %g\tPrecision: %g\tRecall: %g\tF-score: %g Loss: %g" % (accuracy, precision, recall, fscore, running_loss))
    print(contra)
    print("------------------------------------------")
        


HBox(children=(IntProgress(value=0, description='Epoch', max=20, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='Iteration', max=500, style=ProgressStyle(description_width='i…

RuntimeError: dimension mismatch for operand 0: equation 3 tensor 4

In [17]:
torch.save(model, "multi_%g, %g.pkl" % (accuracy, CNT))

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [14]:
predictions = get_predictions(model, dataloader_3way)

In [10]:
%%time
# 測試集
testset = Dataset_3Way("test", tokenizer=tokenizer)
testloader = DataLoader(testset, batch_size=1, 
                        collate_fn=create_mini_batch)

predictions = get_predictions(model, testloader)

# 將預測的 label id 轉回文字
index_map = {v: k for k, v in testset.label_map.items()}

df_pred = pd.DataFrame({"label": predictions.tolist()})

Wall time: 42.3 s


In [11]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

pred_Y = df_pred['label'].values
test_Y = pd.read_csv('test.tsv', sep='\t').fillna("")['label'].values

accuracy = accuracy_score(test_Y, np.array(pred_Y))
precision = precision_score(test_Y, pred_Y, average='macro')
recall = recall_score(test_Y, pred_Y, average='macro')
fscore = f1_score(test_Y, pred_Y, average='macro')

print("Accuracy: %g\tPrecision: %g\tRecall: %g\tF-score: %g" % (
    accuracy, precision, recall, fscore))

Accuracy: 0.575	Precision: 0.492086	Recall: 0.489153	F-score: 0.482597


In [1]:
torch.save(model, 'contra_61_28.pkl')

NameError: name 'torch' is not defined

In [5]:
model2 = torch.load('test.pkl',map_location=torch.device('cpu'))

In [75]:
from bertviz import bertviz

model_version = 'xlnet-base-cased'
#model_multi = model 
model_pretrained = XLNetModel.from_pretrained(model_version, output_attentions=True)
model_single = torch.load('acc_0.52_complete.pkl',map_location=torch.device('cpu'))
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') 

AttributeError: Can't get attribute 'gelu' on <module 'transformers.modeling_xlnet' from '/Users/chenyutsai/opt/anaconda3/lib/python3.7/site-packages/transformers/modeling_xlnet.py'>

In [29]:
def explainability_compare(model, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b):
    inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True)
    input_ids = inputs['input_ids'].cuda()
    input_ids.squeeze()
    tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
    token_type_ids = inputs['token_type_ids'].cuda()
    
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=token_type_ids)[1]
    attention = model(input_ids, token_type_ids=token_type_ids)[-1]
    
    attn = format_attention(attention, tokens)  
    tokens = format_special_chars(tokens)
    sentence_b_start = token_type_ids[0].tolist().index(1)
    slice_a = slice(0, sentence_b_start)
    slice_b = slice(sentence_b_start, len(tokens))
    attn_data = attn[:, :, slice_a, slice_b]
    sentence_a_tokens = tokens[slice_a]
    sentence_b_tokens = tokens[slice_b]
    pair = pair_match(sentence_a_tokens, sentence_b_tokens, attn_data=attn_data)
    pair = sorted(pair, key=lambda pair: pair[2], reverse=True)
    pair = pair_return(pair)
    
    test_inputs = tokenizer.encode_plus(test_sentence_a, test_sentence_b, return_tensors='pt', add_special_tokens=False)
    test_input_ids = test_inputs['input_ids']
    test_input_ids.squeeze()
    test_tokens = tokenizer.convert_ids_to_tokens(test_input_ids.squeeze().tolist())
    test_token_type_ids = test_inputs['token_type_ids']
    test_tokens = format_special_chars(test_tokens)
    test_sentence_b_start = test_token_type_ids[0].tolist().index(1)
    test_slice_a = slice(0, test_sentence_b_start)
    test_slice_b = slice(test_sentence_b_start, len(test_tokens))
    test_sentence_a_tokens = test_tokens[test_slice_a]
    test_sentence_b_tokens = test_tokens[test_slice_b]
    test_pair = pair_match(test_sentence_a_tokens, test_sentence_b_tokens, attn_data=None)

    return MRR_calculate(test_pair, pair)

In [10]:
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=False)
input_ids = inputs['input_ids']
input_ids.squeeze()
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
token_type_ids = inputs['token_type_ids']
tokens = format_special_chars(tokens)

In [6]:
# sentence_a = 'sentence 1'
# sentence_b = 'sentence 2'
sentence_a = """a soyuz capsule carrying a russian cosmonaut, an american astronaut and u.s. billionaire tourist charles simonyi has docked at the international space station. russian cosmonaut gennady padalka manually guided the capsule to a stop ahead of schedule saturday two days after blasting off from the baikonur cosmodrome in kazakhstan. the crews of the capsule and the station will spend around three hours checking seals before opening the air locks and meeting up facetoface.
"""
sentence_b = "charles simonyi is a russian cosmonaut."

In [57]:
testset = Dataset_3Way("test_2", tokenizer=tokenizer)
testloader = DataLoader(testset, batch_size=1, collate_fn=create_mini_batch)

In [60]:
predictions = get_predictions(model2, testloader)

tensor([[0.5723, 0.1841, 0.1501]])
tensor([[ 0.7128, -1.1001, -0.2352]])
tensor([[ 1.5935, -1.3127, -0.5411]])


In [67]:
testset = Dataset_3Way("test_2", tokenizer=tokenizer)
testloader = DataLoader(testset, batch_size=1, collate_fn=create_mini_batch)
predictions = get_predictions(model2, testloader)

tensor([[ 0.1305,  0.1911, -0.3580]])
tensor([[ 0.5888, -0.6714, -0.3910]])
tensor([[ 0.8907, -0.8635, -0.5242]])


In [30]:
model2.eval()
with torch.no_grad():
    pred = model2(input_ids, token_type_ids=token_type_ids)[0]
print(pred)

tensor([[ 2.3032, -1.0553, -0.4880]])


In [15]:
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True)

In [16]:
input_ids = inputs['input_ids'].cuda()
input_ids.squeeze()
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
token_type_ids = inputs['token_type_ids'].cuda()

In [29]:
with torch.no_grad():
    outputs = model_trained(input_ids, token_type_ids=token_type_ids)[1]

In [25]:
attention_trained = model_trained(input_ids, token_type_ids=token_type_ids)[-1]

In [30]:
input_ids = inputs['input_ids']
input_ids.squeeze()
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
token_type_ids = inputs['token_type_ids']

In [31]:
attention = model(input_ids, token_type_ids=token_type_ids)[-1]

In [32]:
input_id_list = input_ids[0].tolist() # Batch index 0
input_id_list
tokens = tokenizer.convert_ids_to_tokens(input_id_list) 

In [9]:
def format_attention(attention, tokens):
    for i, t in enumerate(tokens):
        if t in ("<sep>", "<cls>"):
            for layer_attn in attention:
                layer_attn[0, :, i, :] = 0
                layer_attn[0, :, :, i] = 0
    squeezed = []
    for layer_attention in attention:
        # 1 x num_heads x seq_len x seq_len
        squeezed.append(layer_attention.squeeze(0))
    # num_layers x num_heads x seq_len x seq_len
    return torch.stack(squeezed)

In [8]:
def format_special_chars(tokens):
    return [t.replace('Ġ', '').replace('▁', '').replace('</w>', '') for t in tokens]

In [14]:
def look_score(attn_data, index_a, index_b):
    score = 0.
    for layer in attn_data:
        for head in layer:
            score_individaul = head[index_a][index_b].tolist()
            score += score_individaul
    return round(score, 3)

In [15]:
def pair_match(sentence_a_tokens, sentence_b_tokens, attn_data=None):
    whole = []
    for token_a in sentence_a_tokens:
        index_a = sentence_a_tokens.index(token_a)
        for token_b in sentence_b_tokens:
            index_b = sentence_b_tokens.index(token_b)
            if attn_data is not None:
                score = look_score(attn_data, index_a, index_b)
                pair = (token_a, token_b, score)
                if score != 0:
                    whole.append(pair)
            else:
                pair = (token_a, token_b)
                whole.append(pair)
    return whole

In [37]:
attn = format_attention(attention, tokens)
attn_trained = format_attention(attention_trained, tokens)

In [38]:
tokens = format_special_chars(tokens)
sentence_b_start = token_type_ids[0].tolist().index(1)
slice_a = slice(0, sentence_b_start)
slice_b = slice(sentence_b_start, len(tokens))
attn_data = attn[:, :, slice_a, slice_b]
attn_data_trained = attn_trained[:, :, slice_a, slice_b]
sentence_a_tokens = tokens[slice_a]
sentence_b_tokens = tokens[slice_b]
pair = pair_match(sentence_a_tokens, sentence_b_tokens, attn_data=attn_data)
pair = sorted(pair, key=lambda pair: pair[2], reverse=True) 
pair_trained = pair_match(sentence_a_tokens, sentence_b_tokens, attn_data_trained)
pair_trained = sorted(pair_trained, key=lambda pair_trained: pair_trained[2], reverse=True) 

In [17]:
def pair_return(pair):
    lst = []
    for a, b, s in pair:
        p = (a, b)
        lst.append(p)
    return lst

In [None]:
pair = pair_return(pair)
pair_trained = pair_return(pair_trained)

In [374]:
import random

sampling = random.choices(pair_truth, k=4)
sampling

[('A', 'a'), ('A', 'a'), ('game', 'sport'), ('soccer', 'sport')]

In [45]:
test_sentence_a = """an american astronaut and u.s. billionaire tourist charles simonyi"""
test_sentence_b = 'charles simonyi is a russian cosmonaut'

inputs = tokenizer.encode_plus(test_sentence_a, test_sentence_b, return_tensors='pt', add_special_tokens=False)
input_ids = inputs['input_ids']
input_ids.squeeze()
test_tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
test_token_type_ids = inputs['token_type_ids']
test_tokens = format_special_chars(test_tokens)
test_sentence_b_start = test_token_type_ids[0].tolist().index(1)
slice_a = slice(0, test_sentence_b_start)
slice_b = slice(test_sentence_b_start, len(test_tokens))
test_sentence_a_tokens = test_tokens[slice_a]
test_sentence_b_tokens = test_tokens[slice_b]
test_pair = pair_match(test_sentence_a_tokens, test_sentence_b_tokens, attn_data=None)

In [19]:
def MRR_calculate(pair_truth, pair_all):
    final_score = 0.
    for query in pair_truth:
        for response in range(len(pair_all)):
            if pair_all[response] == query:
                score = 1/(response+1)
                final_score += score
    final_score = final_score/len(pair_truth)
    return final_score

def MRR_mean(pair_truth, pair_all, top_k, times):
    filtered = random.choices(pair_truth, k=top_k)
    final = 0.
    for i in range(times):
        score = MRR_calculate(filtered, pair_all)
        final += score
    final = final/times
    return final

# first = MRR_mean(test_pair, pair, 6, 1000)
# second = MRR_mean(test_pair, pair_trained, 6, 1000)
# print(first, second)

# Contradiction

In [33]:
sentence_a = """a soyuz capsule carrying a russian cosmonaut, an american astronaut and u.s. billionaire tourist charles simonyi has docked at the international space station. russian cosmonaut gennady padalka manually guided the capsule to a stop ahead of schedule saturday two days after blasting off from the baikonur cosmodrome in kazakhstan. the crews of the capsule and the station will spend around three hours checking seals before opening the air locks and meeting up facetoface.
"""
sentence_b = "charles simonyi is a russian cosmonaut."

test_sentence_a = "an american astronaut and u.s. billionaire tourist charles simonyi"
test_sentence_b = 'charles simonyi is a russian cosmonaut'

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.024615
SingleTask: 0.015987
MultiTask: 0.025282


In [34]:
sentence_a = """bout who is generally believed to be a model for the arms dealer portrayed by nicolas cage in the 2005 movie 'lord of war'  has repeatedly denied any involvement in illicit activities. at a hearing earlier this month, he angrily accused the united states of framing him and pressuring thailand to extradite him. he has long been linked to some of the world's most notorious conflicts, allegedly supplying arms to former liberian dictator charles taylor and libyan leader colonel gaddafi.
"""
sentence_b = "gaddafi is the liberian dictator."

test_sentence_a = "former liberian dictator charles taylor"
test_sentence_b = 'gaddafi is the liberian dictator'

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.011633
SingleTask: 0.012457
MultiTask: 0.019987


In [35]:
sentence_a = """a man has hijacked a passenger plane in the jamaican resort of montego bay, and is still holding five crew members hostage, reports say. more than 150 passengers were on board when the man forced his way on board the canadabound charter plane but all have now been released. the man breached security and boarded as the plane was about to take off at around 2230 local time 0330 gmt. negotiations for the release of the hostages are said to be taking place."""
sentence_b = "a plane crashed in the jamaican resort of montego bay."

test_sentence_a = "a man has hijacked a passenger plane in the jamaican resort of montego bay"
test_sentence_b = 'a plane crashed in the jamaican resort of montego bay'

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.022674
SingleTask: 0.022773
MultiTask: 0.024005


In [36]:
sentence_a = """the hurricane caused severe destruction across the entire mississippi coast and into alabama, as far as 100 miles 160 km from the storm's center. katrina was the eleventh tropical storm, fifth hurricane, third major hurricane, and second category 5 hurricane of the 2005 atlantic season. it formed over the bahamas on august 23, 2005, and crossed southern florida as a moderate category 1 hurricane, causing some deaths and flooding there, before strengthening rapidly in the gulf of mexico and becoming one of the strongest hurricanes on record while at sea.
"""
sentence_b = "hurricane katrina formed in august 2005 in the gulf of mexico."

test_sentence_a = "it formed over the bahamas on august 23, 2005"
test_sentence_b = 'hurricane katrina formed in august 2005 in the gulf of mexico.'

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.017245
SingleTask: 0.015898
MultiTask: 0.017563


In [37]:
sentence_a = """north korea's rubberstamp parliament has reelected kim jongil as chairman of the national defence commission, the country's most powerful position. mr kim's reelection comes days after a rocket launch that was lauded at home but criticised at the un. analysts say the move will help cement mr kim's position, after rumours he lost some of his grip on power after suffering a stroke in august. observers have expressed concern about the apparent lack of a succession plan.
"""
sentence_b = "kim jongil is the chairman of the un."

test_sentence_a = "kim jongil as chairman of the national defence commission"
test_sentence_b = 'kim jongil is the chairman of the un.'

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.031021
SingleTask: 0.027749
MultiTask: 0.033714


In [40]:
sentence_a = """leftist mauricio funes of el salvador's former marxist rebel fmln party has won the country's presidential election. he defeated his conservative rival, the arena party's rodrigo avila, who has admitted defeat. arena had won every presidential election since the end of el salvador's civil war 18 years ago. addressing jubilant supporters, mr funes said it was the happiest day of his life and the beginning of a new chapter of peace for the country. branded by his opponents as a puppet of venezuala's president hugo chavez, mr funes vowed to respect all salvadorian democratic institutions.
"""
sentence_b = "rodrigo avila has won el salvador's presidential election."

test_sentence_a = """leftist mauricio funes of el salvador's former marxist rebel fmln party has won the country's presidential election"""
test_sentence_b = "rodrigo avila has won el salvador's presidential election"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.015666
SingleTask: 0.016491
MultiTask: 0.013709


# Entailment

In [41]:
sentence_a = """us music producer phil spector has been convicted of murdering actress lana clarkson, after a fivemonth retrial. the 68yearold, famous for the "wall of sound" recording technique, faces between 15 years and life in prison. he had pleaded not guilty to the second degree murder of 40yearold ms clarkson, who was shot in the mouth at spector's home in los angeles. spector was remanded in custody until sentencing on 29 may. his lawyer has said he intends to appeal.
"""
sentence_b = "phil spector was a music producer."

test_sentence_a = "us music producer phil spector"
test_sentence_b = "phil spector was a music producer"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.115151
SingleTask: 0.062170
MultiTask: 0.113543


In [42]:
sentence_a = """crippa died a week ago at santa barbara cottage hospital, seven days after he ate a heaping plate of the deadly amanita ocreata mushrooms, said his wife, joan crippa. known as "death angel" for its snowwhite appearance, the fungus has deadly toxins that worked their way through crippa's system, sickening him and eventually causing his liver to fail. family members had often warned crippa against indulging in his passion for hunting wild mushrooms, an activity he learned from his italian immigrant parents, his wife said.
"""
sentence_b = "crippa was killed by a wild mushroom."

test_sentence_a = "crippa died a week ago at santa barbara cottage hospital, seven days after he ate a heaping plate of the deadly amanita ocreata mushrooms"
test_sentence_b = "crippa was killed by a wild mushroom"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.015529
SingleTask: 0.006583
MultiTask: 0.016079


In [43]:
sentence_a = """one man, who did not want to be named, said: "there was a big fight  i heard it was some sort of retaliation after something else that happened earlier on." most of larkhall park has been sealed off following the incident. a 100metre stretch of wandsworth road, which runs along the western side of the park, has also been closed to traffic. the teenager's death brings the total number of young people to die in violent circumstances in london this year, to seven.
"""
sentence_b = "seven young people died in violent situations this year in london."

test_sentence_a = "the teenager's death brings the total number of young people to die in violent circumstances in london this year, to seven"
test_sentence_b = "seven young people died in violent situations this year in london"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.017591
SingleTask: 0.016502
MultiTask: 0.023821


In [44]:
sentence_a = """a ugandan spy who set up a bogus charity and embezzled thousands of dollars of funding meant for aids patients has been jailed for 10 years. teddy sseezi cheeye, 51, took $56,000 38,000 from the global fund charity, which aims to prevent hiv, tuberculosis and malaria. he set up an ngo, the uganda centre for accountability, which received cash in 2005 to do hiv/aids community work. but the high court in kampala heard cheeye siphoned off the funds instead.
"""
sentence_b = "teddy sseezi cheeye is an ugandan spy."

test_sentence_a = "a ugandan spy who set up a bogus charity and embezzled thousands of dollars of funding meant for aids patients has been jailed for 10 years. teddy sseezi cheeye"
test_sentence_b = "teddy sseezi cheeye is an ugandan spy"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.027014
SingleTask: 0.028418
MultiTask: 0.029012


In [45]:
sentence_a = """a japanese pop singer who was arrested for public indecency after being found drunk and naked in a tokyo park has apologised for his "shameful" conduct. "i drank a lot and did not know what i was doing," tsuyoshi kusanagi said on friday after being released by police. "i deeply apologise to fans for causing so much trouble and worry," he added. kusanagi, 34, shot to fame as a member of 1980s boy band smap. he has not been charged for the incident, which took place in the early hours of thursday.
"""
sentence_b = "tsuyoshi kusanagi is a pop star."

test_sentence_a = """a japanese pop singer who was arrested for public indecency after being found drunk and naked in a tokyo park has apologised for his "shameful" conduct. "i drank a lot and did not know what i was doing," tsuyoshi kusanagi said on friday after being released by police"""
test_sentence_b = "tsuyoshi kusanagi is a pop star"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.012499
SingleTask: 0.012254
MultiTask: 0.022521


# Key span

In [46]:
sentence_a = """a ugandan spy who set up a bogus charity and embezzled thousands of dollars of funding meant for aids patients has been jailed for 10 years. teddy sseezi cheeye, 51, took $56,000 38,000 from the global fund charity, which aims to prevent hiv, tuberculosis and malaria. he set up an ngo, the uganda centre for accountability, which received cash in 2005 to do hiv/aids community work. but the high court in kampala heard cheeye siphoned off the funds instead.
"""
sentence_b = "teddy sseezi cheeye is an ugandan spy."

test_sentence_a = "a ugandan spy teddy sseezi cheeye"
test_sentence_b = "teddy sseezi cheeye is an ugandan spy"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.056409
SingleTask: 0.057168
MultiTask: 0.059053


In [47]:
sentence_a = """a japanese pop singer who was arrested for public indecency after being found drunk and naked in a tokyo park has apologised for his "shameful" conduct. "i drank a lot and did not know what i was doing," tsuyoshi kusanagi said on friday after being released by police. "i deeply apologise to fans for causing so much trouble and worry," he added. kusanagi, 34, shot to fame as a member of 1980s boy band smap. he has not been charged for the incident, which took place in the early hours of thursday.
"""
sentence_b = "tsuyoshi kusanagi is a pop star."

test_sentence_a = """a japanese pop singer tsuyoshi kusanagi"""
test_sentence_b = "tsuyoshi kusanagi is a pop star"

model_pretrained.to(device)
X_pretrained = explainability_compare(model_pretrained, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_single = explainability_compare(model_single, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)
X_multi = explainability_compare(model_multi, tokenizer, sentence_a, sentence_b, test_sentence_a, test_sentence_b)

print("Pretrained: %6f\nSingleTask: %6f\nMultiTask: %6f" % (X_pretrained, X_single, X_multi))

Pretrained: 0.037847
SingleTask: 0.036912
MultiTask: 0.062019
