In [None]:
import argparse
import torch
import pandas as pd
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import AutoConfig, AutoModelForSequenceClassification
from transformers import BertConfig, BertForSequenceClassification, AdamW, get_scheduler
# from datasets import load_metric
from transformers import TrainingArguments
from transformers import Trainer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.datasets import load_svmlight_file

In [None]:
df1 = pd.read_csv("train_oversample.csv")
df2 = pd.read_csv("dev_oversample.csv")
df3 = pd.read_csv("test_oversample.csv")

In [None]:
from transformers import BertTokenizer, BertModel

from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
model = BertModel.from_pretrained("bert-base-multilingual-uncased")

In [None]:
sentences_train = df1.src_mt.values
sentences_dev = df2.src_mt.values
sentences_test = df3.src_mt.values

In [None]:
labels_train = df1.hallucinate.values
labels_dev = df2.hallucinate.values
labels_test = df3.hallucinate.values

In [None]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

# Load BertForSequenceClassification, the pretrained BERT model with a single 
# linear classification layer on top. 
model = BertForSequenceClassification.from_pretrained(
    "bert-base-multilingual-uncased", 
    num_labels = 2, 
    output_attentions = False, 
    output_hidden_states = True, 
)

In [None]:
model.resize_token_embeddings(len(tokenizer))

In [None]:
# load the model

checkpoint = torch.load("/data/mounted/angana/bert_train_undersampled_srcmt_3.pth", map_location='cpu')

state_dict = checkpoint['model_state_dict']

from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] # remove 'module.' of dataparallel
    new_state_dict[name]=v

model.load_state_dict(new_state_dict)

In [None]:
type(model)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
optimizer = AdamW(model.parameters(),
                  lr = 4e-5, 
                  eps = 1e-8 
                )

In [None]:
from transformers import get_linear_schedule_with_warmup

epochs = 4

total_steps = len(train_dataloader) * epochs

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

### Tokenize example sentence 

In [None]:
# get tokenized BERT text inputs

In [None]:
df1.head()

In [None]:
df1.src_mt[0]

In [None]:
# translated sentence

text = "Ich habe Verständnis dafür, dass ein breiter Konsens beim Thema Umwelthaftung nicht immer möglich ist. Gerade darum möchte ich aber heute auch ganz besonders für die Annahme des Berichts des Ausschusses für Recht und Binnenmarkt werben, denn der Rechtsausschuss hat eine klare, maßvolle und praktikable Stellungnahme abgegeben. Der Bericht stellt einen großen Schritt in Richtung eines verbesserten Umweltschutzes dar und sollte deshalb auch im Plenum morgen bei der Abstimmung nachdrücklich unterstützt werden.	"

encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)


print(' Original: ', text)

# Print the sentence split into tokens.
print('Tokenized: ', tokenizer.tokenize(text))
tokenized_original = tokenizer.tokenize(text)
# Print the sentence mapped to token ids.
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text)))

In [None]:
original = ' '.join(str(x) for x in tokenized_original)

original = "[CLS] " + original + " [SEP]"

In [None]:
translated = ' '.join(str(x) for x in tokenized_translated)
translated = "[CLS] " + translated + " [SEP]"

In [None]:
model.bert.embeddings.word_embeddings.weight

### Integrated gradients

In [None]:
import captum

from captum.attr import visualization as viz
from captum.attr import LayerConductance, LayerIntegratedGradients

In [None]:
import spacy

In [None]:
from captum.attr import IntegratedGradients
from captum.attr import InterpretableEmbeddingBase, TokenReferenceBase
from captum.attr import visualization
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

In [None]:
def squad_pos_forward_func(inputs, token_type_ids=None, position_ids=None, attention_mask=None, position=0):
    pred = predict(inputs,
                   token_type_ids=token_type_ids,
                   position_ids=position_ids,
                   attention_mask=attention_mask)
    pred = pred[position]
    return pred

In [None]:
ref_token_id = tokenizer.pad_token_id # A token used for generating token reference
sep_token_id = tokenizer.sep_token_id # A token used as a separator between question and text and it is also added to the end of the text.
cls_token_id = tokenizer.cls_token_id # A token used for prepending to the concatenated question-text word sequence

In [None]:
def construct_input_ref_pair(text, ref_token_id, sep_token_id, cls_token_id):
    text_ids = tokenizer.encode(text, add_special_tokens=False)
    # construct input token ids
    input_ids = [cls_token_id] + text_ids + [sep_token_id]
    # construct reference token ids 
    ref_input_ids = [cls_token_id] + [ref_token_id] * len(text_ids) + [sep_token_id]
    return torch.tensor([input_ids]), torch.tensor([ref_input_ids], ), len(text_ids)

def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]])
    ref_token_type_ids = torch.zeros_like(token_type_ids, )# * -1
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long)
    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    return torch.ones_like(input_ids)

In [None]:
def predict(inputs, token_type_ids=None, position_ids=None, attention_mask=None):
    output = model(inputs, token_type_ids=token_type_ids,
                 position_ids=position_ids, attention_mask=attention_mask, )
    return output.logits

In [None]:
attributes_list = list()
tokenizer_list = list()
cnt= 0
for i in sentences_test:
    input_ids, ref_input_ids, sep_id = construct_input_ref_pair(i, ref_token_id, sep_token_id, cls_token_id)
    token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
    position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
    attention_mask = construct_attention_mask(input_ids)

    indices = input_ids[0].tolist()
    all_tokens = tokenizer.convert_ids_to_tokens(indices)
    
    lig = LayerIntegratedGradients(squad_pos_forward_func, model.bert.embeddings)

    attributions, delta = lig.attribute(inputs=input_ids,
                                  baselines=ref_input_ids,
                                  additional_forward_args=(token_type_ids, position_ids, attention_mask, 0),
                                  return_convergence_delta=True)
    
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    
    
    attributes_list.append(attributions)
    tokenizer_list.append(tokenizer.tokenize(i))
    print(cnt)
    cnt = cnt + 1 

In [None]:
torch.save(attributes_list, "undersampled_srcmt_attributes.pt")

In [None]:
lig = LayerIntegratedGradients(squad_pos_forward_func, model.bert.embeddings)

attributions, delta = lig.attribute(inputs=input_ids,
                                  baselines=ref_input_ids,
                                  additional_forward_args=(token_type_ids, position_ids, attention_mask, 0),
                                  return_convergence_delta=True)

In [None]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

In [None]:
attributions_sum = summarize_attributions(attributions)

In [None]:
top_attributions = []

for j in range(len(sentences_test)):
    top_attributions.append(sorted(range(len(attributes_list[j])), key=lambda i: attributes_list[j][i])[-10:])

In [None]:
sentence = sentences_test

In [None]:
text_ids = tokenizer.encode(sentence[0], add_special_tokens=False)
    # construct input token ids
input_ids = [cls_token_id] + text_ids + [sep_token_id]

In [None]:
text_ids = tokenizer.tokenize(sentence[0])
text_ids.insert(0, '[CLS]')
text_ids.append('[SEP]')

text_ids

In [None]:
list_token = list()
attr_score = list()

print(len(attributes_list[0]) - 1)

for x in range(len(sentences_test)):
    text_ids = tokenizer.tokenize(sentence[x], add_special_tokens=False)
    text_ids.insert(0, '[CLS]')
    text_ids.append('[SEP]')    
    for i in top_attributions[x]:
        list_token.append(text_ids[i]) 
        attr_score.append(attributes_list[x][i].numpy().tolist())

In [None]:
data_tuples = list(zip(list_token, attr_score))
df_ = pd.DataFrame(data_tuples, columns=['Token', 'Avg Attribution Score'])
df_.head(50)

In [None]:
# store the top tokens in a .csv file 

(df_.groupby('Token', as_index=False)['Avg Attribution Score'].mean()).sort_values(by=['Avg Attribution Score'], ascending=False)

### Layer Understanding 

In [None]:
def squad_pos_forward_func2(input_emb, attention_mask=None, position=0):
    pred = model(inputs_embeds=input_emb, attention_mask=attention_mask, )
    pred = pred[position]
    return pred.max(1).values

In [None]:
layer_attrs_start = []
# layer_attrs_end = []

# The token that we would like to examine separately.
token_to_explain = 23 # the index of the token that we would like to examine more thoroughly
layer_attrs_start_dist = []
# layer_attrs_end_dist = []

input_embeddings, ref_input_embeddings = construct_whole_bert_embeddings(input_ids, ref_input_ids, \
                                         token_type_ids=token_type_ids, ref_token_type_ids=ref_token_type_ids, \
                                         position_ids=position_ids, ref_position_ids=ref_position_ids)

for i in range(model.config.num_hidden_layers):
    lc = LayerConductance(squad_pos_forward_func2, model.bert.encoder.layer[i])
    layer_attributions_start = lc.attribute(inputs=input_embeddings, baselines=ref_input_embeddings, additional_forward_args=(attention_mask, 0))
#     layer_attributions_end = lc.attribute(inputs=input_embeddings, baselines=ref_input_embeddings, additional_forward_args=(attention_mask, 1))
    layer_attrs_start.append(summarize_attributions(layer_attributions_start).cpu().detach().tolist())
#     layer_attrs_end.append(summarize_attributions(layer_attributions_end).cpu().detach().tolist())

    # storing attributions of the token id that we would like to examine in more detail in token_to_explain
    layer_attrs_start_dist.append(layer_attributions_start[0,token_to_explain,:].cpu().detach().tolist())
#     layer_attrs_end_dist.append(layer_attributions_end[0,token_to_explain,:].cpu().detach().tolist())

In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns
import numpy as np

In [None]:
len(sentence)

In [None]:
fig, ax = plt.subplots(figsize=(100,20))
xticklabels=all_tokens
yticklabels=list(range(1,13))
ax = sns.heatmap(np.array(layer_attrs_start), xticklabels=xticklabels, yticklabels=yticklabels, linewidth=0.2)
plt.xlabel('Tokens')
plt.ylabel('Layers')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,10))
ax = sns.boxplot(data=layer_attrs_start_dist)
plt.xlabel('Layers')
plt.ylabel('Attribution')
plt.show()