In [1]:
!pip3 install captum
!pip3 install transquest

Collecting captum
[?25l  Downloading https://files.pythonhosted.org/packages/cc/da/50dd447964766b92a0d1e3781559401b5c58d3b524b8dbb8fab75dc98070/captum-0.4.0-py3-none-any.whl (1.4MB)
[K     |▎                               | 10kB 18.1MB/s eta 0:00:01[K     |▌                               | 20kB 24.3MB/s eta 0:00:01[K     |▊                               | 30kB 27.9MB/s eta 0:00:01[K     |█                               | 40kB 29.4MB/s eta 0:00:01[K     |█▏                              | 51kB 30.9MB/s eta 0:00:01[K     |█▍                              | 61kB 31.1MB/s eta 0:00:01[K     |█▋                              | 71kB 26.8MB/s eta 0:00:01[K     |██                              | 81kB 27.3MB/s eta 0:00:01[K     |██▏                             | 92kB 27.8MB/s eta 0:00:01[K     |██▍                             | 102kB 28.4MB/s eta 0:00:01[K     |██▋                             | 112kB 28.4MB/s eta 0:00:01[K     |██▉                             | 122kB 28.4MB

In [2]:
import numpy as np

import torch
import torch.nn as nn

from captum.attr import IntegratedGradients
from tqdm import tqdm

In [3]:
from captum.attr import visualization as viz
import json

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
from transquest.algo.sentence_level.monotransquest.run_model import MonoTransQuestModel

RESULTS_FNAME = 'results.json'
SRC_LANG = 'et'
TGT_LANG = 'en'
SPLIT = 'dev'
#data_dir = f'/content/drive/MyDrive/Meta/data/{SPLIT}/{SRC_LANG}-{TGT_LANG}-{SPLIT}'
data_dir = f'/content/drive/MyDrive/data/{SPLIT}/{SRC_LANG}-{TGT_LANG}-{SPLIT}'
src = [s.strip() for s in open(f'{data_dir}/{SPLIT}.src').readlines()]
tgt = [s.strip() for s in open(f'{data_dir}/{SPLIT}.mt').readlines()]
wor = [list(map(int, s.strip().split())) for s in open(f'{data_dir}/{SPLIT}.tgt-tags').readlines()]
sen = [float(s.strip()) for s in open(f'{data_dir}/{SPLIT}.da').readlines()]
assert len(src) == len(tgt) == len(wor) == len(sen)
dataset = {'src': src, 'tgt': tgt, 'word_labels': wor, 'sent_labels': sen}

# Load model
tr_model = MonoTransQuestModel(
    'xlmroberta',
    f'TransQuest/monotransquest-da-{SRC_LANG}_{TGT_LANG}-wiki', num_labels=1, use_cuda=torch.cuda.is_available()
)
# For De-Zh and Ru-De We used the multilingual model
# tr_model = MonoTransQuestModel(
#    'xlmroberta',
#    f'TransQuest/monotransquest-da-multilingual', num_labels=1, use_cuda=torch.cuda.is_available()
#)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
tr_model.model.to(device)



Downloading:   0%|          | 0.00/721 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/236 [00:00<?, ?B/s]

  f"use_multiprocessing automatically disabled as {model_type}"


XLMRobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
     

In [6]:
from transquest.algo.sentence_level.monotransquest.utils import LazyClassificationDataset, InputExample, \
        convert_examples_to_features

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset

# Evaluation Functions
We copied them into our notebooks for easier access in Google Colab noteboos

In [7]:

from sklearn.metrics import roc_auc_score, average_precision_score
from scipy.stats import pearsonr

def read_sentence_data(gold_sent_fh, model_sent_fh):
    gold_scores = [float(line.strip()) for line in gold_sent_fh]
    model_scores = [float(line.strip()) for line in model_sent_fh]
    assert len(gold_scores) == len(model_scores)
    return gold_scores, model_scores


def read_word_data(gold_explanations_fh, model_explanations_fh):
    gold_explanations = [list(map(int, line.split())) for line in gold_explanations_fh]
    model_explanations = [list(map(float, line.split())) for line in model_explanations_fh]
    assert len(gold_explanations) == len(model_explanations)
    for i in range(len(gold_explanations)):
        assert len(gold_explanations[i]) == len(model_explanations[i])
        assert len(gold_explanations[i]) > 0
    return gold_explanations, model_explanations


def validate_word_level_data(gold_explanations, model_explanations):
    valid_gold, valid_model = [], []
    for gold_expl, model_expl in zip(gold_explanations, model_explanations):
        if sum(gold_expl) == 0 or sum(gold_expl) == len(gold_expl):
            continue
        else:
            valid_gold.append(gold_expl)
            valid_model.append(model_expl)
    return valid_gold, valid_model


def compute_auc_score(gold_explanations, model_explanations):
    res = 0
    for i in range(len(gold_explanations)):
        res += roc_auc_score(gold_explanations[i], model_explanations[i])
    return res / len(gold_explanations)


def compute_ap_score(gold_explanations, model_explanations):
    res = 0
    for i in range(len(gold_explanations)):
        res += average_precision_score(gold_explanations[i], model_explanations[i])
    return res / len(gold_explanations)


def compute_rec_topk(gold_explanations, model_explanations):
    res = 0
    for i in range(len(gold_explanations)):
        idxs = np.argsort(model_explanations[i])[::-1][:sum(gold_explanations[i])]
        res += len([idx for idx in idxs if gold_explanations[i][idx] == 1])/sum(gold_explanations[i])
    return res / len(gold_explanations)


def evaluate_word_level(gold_explanations, model_explanations):
    gold_explanations, model_explanations = validate_word_level_data(gold_explanations, model_explanations)
    auc_score = compute_auc_score(gold_explanations, model_explanations)
    ap_score = compute_ap_score(gold_explanations, model_explanations)
    rec_topk = compute_rec_topk(gold_explanations, model_explanations)
    print('AUC score: {:.3f}'.format(auc_score))
    print('AP score: {:.3f}'.format(ap_score))
    print('Recall at top-K: {:.3f}'.format(rec_topk))
    return auc_score, ap_score, rec_topk


def evaluate_sentence_level(gold_scores, model_scores):
    corr = pearsonr(gold_scores, model_scores)[0]
    print('Pearson correlation: {:.3f}'.format(corr))


# Captum Functions

In [8]:
def create_input(to_predict, tr_model):
    dummy_label = 0
    if isinstance(to_predict[0], list):
        eval_examples = [
            InputExample(i, text[0], text[1], dummy_label) for i,text in enumerate(to_predict)]
    else:
        eval_examples = [InputExample(i, text, None, dummy_label) for i, text in enumerate(to_predict)]
    eval_dataset = tr_model.load_and_cache_examples(
                eval_examples, evaluate=True, multi_label=False,                        no_cache=True)

    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler,      batch_size=tr_model.args.eval_batch_size)
    for batch in eval_dataloader:
        inputs = tr_model._get_inputs_dict(batch)
    return inputs

In [9]:

def forward(inputs, attention_mask, labels,token_type_ids):
    prediction = tr_model.model(input_ids=inputs,attention_mask=attention_mask,token_type_ids=token_type_ids,labels=labels)
    return prediction[1]

In [10]:
class CaptumModel(torch.nn.Module):
    def __init__(self, model):
        super(CaptumModel, self).__init__()
        self.model = model

    def forward(self,inputs, attention_mask, labels,token_type_ids):
        return self.model(input_ids=inputs,attention_mask=attention_mask,token_type_ids=token_type_ids,labels=labels)[1]

In [11]:
def find_tgt_start_index(tokens):
  i= 1
  while i < len(tokens):
    if tokens[i]== "</s>":
      return i + 2
    i+= 1

def get_tgt_tokens(start_index, tokens):
  return(tokens[start_index: -1])

#This function combines the individual WordPiece scores into word-level scores
def get_attr(all_tokens, attributions, prediction, v=False, show=True, show_gt=False):
  
  start_index = find_tgt_start_index(all_tokens)
  tgt_tokens = get_tgt_tokens(start_index, all_tokens)
  tgt_attribution = get_tgt_tokens(start_index, attributions)

  i = 0
  scores = []
  min_scores = []
  max_scores = []
  absmin_scores = []
  absmax_scores = []
  mean_scores = []
  sum_scores = []
  tokens = []
  while i < len(tgt_tokens):
    if(tgt_tokens[i] in [] ):
      i+=1
      continue
    #print(f"First {tgt_tokens[i]}")
    local_scores = []
    local_tokens = ""
    local_tokens += tgt_tokens[i][1:]
    local_scores.append(tgt_attribution[i])
    j=1
    if not i + j == len(tgt_tokens):
      while(not tgt_tokens[i + j][0] == "▁"):
        #print(f"not {tgt_tokens[i + j]}")
        local_tokens += tgt_tokens[i + j]
        local_scores.append(tgt_attribution[i + j])
        j += 1
        if i + j == len(tgt_tokens):
          break
    tokens.append(local_tokens)
    min_scores.append(np.min(local_scores))
    max_scores.append(np.max(local_scores))
    absmin_scores.append(local_scores[np.argmin(np.abs(local_scores))])
    absmax_scores.append(local_scores[np.argmax(np.abs(local_scores))])
    mean_scores.append(np.mean(local_scores))
    sum_scores.append(np.sum(local_scores))

    i = i + j

  return prediction, min_scores, max_scores, absmin_scores, absmax_scores, mean_scores,sum_scores

def explain(to_predict,explainer,v=False,occlusion=False,integratedgradients=False):
  input_dict = create_input(to_predict, tr_model)
  prediction =  forward(input_dict["input_ids"],input_dict["attention_mask"],input_dict["labels"],input_dict["token_type_ids"]).cpu().detach().numpy()[0][0]
  tr_model.model.eval()
  tr_model.model.zero_grad()
  if occlusion :
    attributions = explainer.attribute(input_dict["input_ids"],additional_forward_args=(input_dict["attention_mask"],input_dict["labels"],input_dict["token_type_ids"]),sliding_window_shapes=(1,))
    attributions = attributions[0]
  elif integratedgradients:
    attributions = explainer.attribute(input_dict["input_ids"],additional_forward_args=(input_dict["attention_mask"],input_dict["labels"],input_dict["token_type_ids"]),internal_batch_size=20)
    attributions = summarize_attributions(attributions)
  else: 
    attributions = explainer.attribute(input_dict["input_ids"],additional_forward_args=(input_dict["attention_mask"],input_dict["labels"],input_dict["token_type_ids"]))
    attributions = summarize_attributions(attributions)
  
  attributions = attributions.cpu().detach().numpy()
  tokenizer = tr_model.tokenizer
  indices = input_dict["input_ids"][0].detach().tolist()
  all_tokens = tokenizer.convert_ids_to_tokens(indices)

  return get_attr(all_tokens, attributions,prediction, v=v)

def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

In [12]:
def explain_dataset(explainer, fname,occlusion=False,integratedgradients=False):
  results = []
  
  for idx in tqdm(range(len(dataset['src']))):
    to_predict = [[src[idx],tgt[idx]]]
    start = time.time()
    pred_score, min, max, absmin, absmax, mean, sum = explain(to_predict,explainer,occlusion=occlusion,integratedgradients=integratedgradients)
    runtime = time.time() - start
    feature_maps = np.zeros(len(min))
    feature_maps_min =  np.zeros(len(min))
    feature_maps_max =  np.zeros(len(min))
    feature_maps_absmin = np.zeros(len(min))
    feature_maps_absmax =  np.zeros(len(min))
    feature_maps_mean =  np.zeros(len(min))
    feature_maps_sum =  np.zeros(len(min))
    for i,v in enumerate(min):
        feature_maps_min[i] = v*-1
        feature_maps_max[i] = max[i]*-1
        feature_maps_absmin[i] = absmin[i]*-1
        feature_maps_absmax[i] = absmax[i]*-1
        feature_maps_mean[i] = mean[i]*-1
        feature_maps_sum[i] = sum[i]*-1      
    results.append(
        {
            'pred': float(pred_score),
            'expl_min': list(feature_maps_min),
            'expl_max': list(feature_maps_max),
            'expl_absmin': list(feature_maps_absmin),
            'expl_absmax': list(feature_maps_absmax),
            'expl_mean': list(feature_maps_mean),
            'expl_sum': list(feature_maps_sum),
            'ground_truth_word': dataset['word_labels'][idx],
            'ground_truth_sent': dataset['sent_labels'][idx],
            "time" : runtime
        }
    )
  json.dump(results, open(fname, "w"))
  return results

In [13]:
from captum.attr import LayerConductance, LayerIntegratedGradients,LayerDeepLift,LayerGradientXActivation,Occlusion
captum_model = CaptumModel(tr_model.model)
explainer_integratedgradients = LayerIntegratedGradients(forward, tr_model.model.roberta.embeddings,device_ids=[0])
explainer_deeplift = LayerDeepLift(captum_model,tr_model.model.roberta.embeddings)
explainer_layerGradientXActivation = LayerGradientXActivation(captum_model,tr_model.model.roberta.embeddings,device_ids=[0])
explainer_occlusion = Occlusion(forward)

In [15]:
explainers = {
  "deeplift" : explainer_deeplift,
  "layerGradientXActivation" : explainer_layerGradientXActivation,
  "occlusion" : explainer_occlusion,
  "integratedgradients" : explainer_integratedgradients
 }

In [16]:
results = {}
import time
from tqdm import tqdm
from functools import partialmethod

tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)

In [None]:
np.random.seed(42)

for name, explainer in explainers.items():
  print(f"Running {name}")
  RESULTS_FNAME = "results" +name +".json"
  results[name] = explain_dataset(explainer, RESULTS_FNAME,occlusion=name == "occlusion",integratedgradients=name=="integratedgradients")

In [18]:
def evaluate_json():
  evaluations = {}
  for name, explaineru in explainers.items():
    fname = "results" + name + ".json"
    try:
      result = json.load(open(fname))
      gold_expls = [item['ground_truth_word'] for item in result]
      model_expls_min = [item['expl_min'] for item in result]
      model_expls_max = [item['expl_max'] for item in result]
      model_expls_absmin =  [item['expl_absmin'] for item in result]
      model_expls_absmax = [item['expl_absmax'] for item in result]
      model_expls_mean =  [item['expl_mean'] for item in result]
      model_expls_sum = [item['expl_sum'] for item in result]
      evaluations[name] = {}
      for explname, expl in zip(["min","max", "absmin", "absmax", "mean", "sum"], [model_expls_min,model_expls_max,model_expls_absmin,model_expls_absmax,model_expls_mean,model_expls_sum]):
        abs_expls = [np.abs(item) for item in expl]
        inverted_expls = [np.array(item)*-1 for item in expl]
        evaluations[name][explname] = {
        "classic" :  evaluate_word_level(gold_expls, expl),
        "abolute": evaluate_word_level(gold_expls, abs_expls),
        "inverted" : evaluate_word_level(gold_expls, inverted_expls)
      }
    except:
      pass
  json.dump(evaluations, open("evaluations.json","w"))
  return evaluations


In [21]:
evaluate_json()

AUC score: 0.545
AP score: 0.459
Recall at top-K: 0.353
AUC score: 0.607
AP score: 0.494
Recall at top-K: 0.390
AUC score: 0.455
AP score: 0.403
Recall at top-K: 0.308
AUC score: 0.476
AP score: 0.405
Recall at top-K: 0.298
AUC score: 0.606
AP score: 0.491
Recall at top-K: 0.389
AUC score: 0.524
AP score: 0.455
Recall at top-K: 0.355
AUC score: 0.511
AP score: 0.425
Recall at top-K: 0.317
AUC score: 0.584
AP score: 0.466
Recall at top-K: 0.366
AUC score: 0.489
AP score: 0.419
Recall at top-K: 0.322
AUC score: 0.510
AP score: 0.440
Recall at top-K: 0.338
AUC score: 0.621
AP score: 0.511
Recall at top-K: 0.404
AUC score: 0.490
AP score: 0.437
Recall at top-K: 0.339
AUC score: 0.511
AP score: 0.428
Recall at top-K: 0.322
AUC score: 0.596
AP score: 0.482
Recall at top-K: 0.383
AUC score: 0.489
AP score: 0.426
Recall at top-K: 0.325
AUC score: 0.508
AP score: 0.435
Recall at top-K: 0.336
AUC score: 0.608
AP score: 0.501
Recall at top-K: 0.397
AUC score: 0.492
AP score: 0.434
Recall at top-K

{'deeplift': {'absmax': {'abolute': (0.6213948361695039,
    0.5113153287879667,
    0.4036886034781681),
   'classic': (0.510035646044825, 0.43972275115335063, 0.33813271980506227),
   'inverted': (0.48996435395517507,
    0.43663023069952006,
    0.33935197257655136)},
  'absmin': {'abolute': (0.5838925819583549,
    0.4662911328502683,
    0.3661412616345154),
   'classic': (0.511256481451313, 0.4246093650995799, 0.3172814187852272),
   'inverted': (0.488743518548687, 0.41914446448196907, 0.3218052674686342)},
  'max': {'abolute': (0.6063166294257729,
    0.49118541317658154,
    0.38874693886857037),
   'classic': (0.47619747521310457, 0.40507683768662206, 0.29779664930203786),
   'inverted': (0.5238025247868956, 0.45518829922012116, 0.3553806184068217)},
  'mean': {'abolute': (0.5963725595794745,
    0.4819334421009182,
    0.3828420087659667),
   'classic': (0.511091495833906, 0.42811488820141796, 0.32152143533007016),
   'inverted': (0.488908504166094, 0.42565684486695116, 0.325