# Interpretation of BertForSequenceClassification in captum

In [2]:
from captum.attr import visualization as viz
from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

from detector import Detector
import torch

In [3]:
# import sys
#
# print(sys.executable)
# model2 = BertForSequenceClassification.from_pretrained('../model/')


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:

# load model
model = Detector("./base_9.pt")
model.model.to(device)
model.model.eval()
model.model.zero_grad()

# load tokenizer
tokenizer = model.tokenizer

Initializing Detector...


In [6]:
def predict(inputs):
    return model.model(inputs)[0]

In [7]:
ref_token_id = tokenizer.pad_token_id # A token used for generating token reference
sep_token_id = tokenizer.sep_token_id # A token used as a separator between question and text and it is also added to the end of the text.
cls_token_id = tokenizer.cls_token_id # A token used for prepending to the concatenated question-text word sequence

In [8]:
def construct_input_ref_pair(text, ref_token_id, sep_token_id, cls_token_id):

    text_ids = tokenizer.encode(text, add_special_tokens=False)
    # construct input token ids
    input_ids = [cls_token_id] + text_ids + [sep_token_id]
    # construct reference token ids 
    ref_input_ids = [cls_token_id] + [ref_token_id] * len(text_ids) + [sep_token_id]

    return torch.tensor([input_ids], device=device), torch.tensor([ref_input_ids], device=device), len(text_ids)

def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]], device=device)
    ref_token_type_ids = torch.zeros_like(token_type_ids, device=device)# * -1
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long, device=device)

    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    return torch.ones_like(input_ids)

def custom_forward(inputs):
    preds = predict(inputs)
    return torch.softmax(preds, dim = 1)[0][0].unsqueeze(-1)

def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

lig = LayerIntegratedGradients(custom_forward, model.model.roberta.embeddings)

In [13]:
# model.model(input_ids)

In [14]:
# predict(input_ids)

tensor([0.0032], device='cuda:0', grad_fn=<UnsqueezeBackward0>)

In [38]:
NUM_OF_TEXT = 10
MUTATION, REAL, SYNTHETIC = 0, 1, 2
FILE_TYPE = SYNTHETIC
# DATA_FILE = './data/Test_WikiHumanQuarterSet.json'
# DATA_FILE = './data/Test_WikiMutationFullReplaceAntonyms.json'
# DATA_FILE = './data/Test_WikiMutationFullReplaceRandomWords.json'
# DATA_FILE = './data/Test_WikiMutationFullReplaceSynonyms.json'
# DATA_FILE = './data/Test_WikiMutationFullSet.json'
# DATA_FILE = './data/Test_WikiMutationFullSetDeleteArticles.json'
# DATA_FILE = './data/Test_WikiMutationFullSetMisspellings.json'
# DATA_FILE = './data/Test_WikiMutationFullSetReplaceAE.json'
# DATA_FILE = './data/Test_WikiMutationQuarterSet.json'
# DATA_FILE = './data/Test_WikiSyntheticFullSet.json'
DATA_FILE = './data/Test_WikiSyntheticQuarterSet.json'

In [39]:
from utils2 import load_standard_json
import random
text_list = load_standard_json(DATA_FILE, True)
random.shuffle(text_list)

In [40]:
#Green most important, red least important
print('\033[1m', 'Visualization For Score', '\033[0m')
for text in text_list[:NUM_OF_TEXT]:
    input_ids, ref_input_ids, sep_id = construct_input_ref_pair(text, ref_token_id, sep_token_id, cls_token_id)
    token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
    position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
    attention_mask = construct_attention_mask(input_ids)

    indices = input_ids[0].detach().tolist()
    all_tokens = tokenizer.convert_ids_to_tokens(indices)

    custom_forward(input_ids)

    attributions, delta = lig.attribute(inputs=input_ids,
                                        baselines=ref_input_ids,
                                        return_convergence_delta=True,
                                        internal_batch_size=1)

    score = predict(input_ids)

    attributions_sum = summarize_attributions(attributions)

    # storing couple samples in an array for visualization purposes
    all_tokens = [token.replace("Ġ", "") for token in all_tokens]
    score_vis = viz.VisualizationDataRecord(
                            attributions_sum,
                            torch.softmax(score, dim = 1)[0][0],
                            torch.argmax(torch.softmax(score, dim = 1)[0]),
                            FILE_TYPE,
                            text,
                            attributions_sum.sum(),
                            all_tokens,
                            delta)

    viz.visualize_text([score_vis])

[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.01),"The smallhead moray cod (Micromoras smallhead) is arothiomorphic rockfish of the genus Micromoras. They are tiny and fairly quiet fish that get their name from their small, forked dorsal and pectoral fins. They are found in cold, freshwater habitats worldwide, and are classified as a smallmouth fish.",2.12,"#s The small head mor ay cod ( Mic rom oras small head ) is a roth i omorphic rock fish of the genus Mic rom oras . They are tiny and fairly quiet fish that get their name from their small , for ked dorsal and p ect oral fins . They are found in cold , freshwater habitats worldwide , and are classified as a small mouth fish . #/s"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.03),"Fantasyfootball.com's ""FC BFV"" is a professional American football league which serve as a breakaway from the National Football League (NFL). With a stated goal of becoming ""the preeminent football league in the United States"", the BFV features all-star players, jaw-dropping stats, and week-long playoffs. Founded in 2002, the BFV was one of the first professional football leagues in the United States. The leagueTumblr has featured articles such as ""The Pros and Cons of CFB"", ""The Best and Worst of CFB,"" an",1.08,"#s F antasy football . com 's "" FC BF V "" is a professional American football league which serve as a break away from the National Football League ( NFL ). With a stated goal of becoming "" the pre em inent football league in the United States "", the BF V features all - star players , jaw - dropping stats , and week - long playoffs . Found ed in 2002 , the BF V was one of the first professional football leagues in the United States . The league Tumblr has featured articles such as "" The Pros and Cons of CF B "", "" The Best and Worst of CF B ,"" an #/s"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.00),Marcy Kahan is an American lawyer who is best known for her unsuccessful attempt to obstruct justice in the prosecution of former New York City Mayor Rudy Giuliani.,1.12,#s Mar cy K ahan is an American lawyer who is best known for her unsuccessful attempt to obstruct justice in the prosecution of former New York City Mayor Rudy Giuliani . #/s
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.01),Vitinha is a Brazilian professional football player who is currently a free agent. He played in the Brazilian Liga for Santos FC and Internacional.,0.27,#s V it in ha is a Brazilian professional football player who is currently a free agent . He played in the Brazilian Liga for Santos FC and Intern ac ional . #/s
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.00),"Sarah Wildes (born October 8, 1978) is an American writer and actor. She is best known for her work in the television series Gilmore Girls and for her role as the portrayed character of Lane Kavanagh on the ABC series House of Cards.",1.05,"#s Sarah Wild es ( born October 8 , 1978 ) is an American writer and actor . She is best known for her work in the television series Gilmore Girls and for her role as the portrayed character of Lane K avan agh on the ABC series House of Cards . #/s"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.00),"The Grammy Awards are an annual award show presented by the Recording Industry Association of America (RIAA) for the best in recorded music. The award was first presented in 1978. The categories for the Grammy Awards have varied over the years, but a Grammy Award for ""Best Instrumental Soloist Performance"" is currently held by the saxophonist McCoy Tyner.",0.04,"#s The Grammy Awards are an annual award show presented by the Recording Industry Association of America ( R IA A ) for the best in recorded music . Ċ Ċ The award was first presented in 1978 . The categories for the Grammy Awards have varied over the years , but a Grammy Award for "" Best Instrument al Solo ist Performance "" is currently held by the sax ophon ist McCoy Ty ner . #/s"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.00),"On September 5, 2009, Virender Sehwag, a former cricketer who has played for India and Team India, was appointed as the new coach of the Indian cricket team. Sehwag, who has coached at smaller teams in the country, has taken over from selector M. A. Anand.",0.05,"#s On September 5 , 2009 , V ire nder Se hw ag , a former cr ick eter who has played for India and Team India , was appointed as the new coach of the Indian cricket team . Se hw ag , who has coached at smaller teams in the country , has taken over from selector M . A . An and . #/s"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.00),"cucurao national rugby union team was first created in 1970. The team played only amateur competitions before finally joining the rugby union professional league in 1978. They finished second in the league in 1978, but took the trophy in 1979. Since then, the team has qualified for the final eighteen times, with only one win. They have taken part in every official international competition, playing all three rugby union codes.",-0.65,"#s c uc ura o national rugby union team was first created in 1970 . The team played only amateur competitions before finally joining the rugby union professional league in 1978 . They finished second in the league in 1978 , but took the trophy in 1979 . Since then , the team has qualified for the final eighteen times , with only one win . They have taken part in every official international competition , playing all three rugby union codes . #/s"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.04),Siemie is a landlocked country on the eastern border of Germany and Austria. It has an area of 372 square kilometres and a population of 14 million. The postal code is 10989. Its capital and largest city is Vienna.,0.39,#s S iem ie is a land locked country on the eastern border of Germany and Austria . It has an area of 372 square kilometres and a population of 14 million . The postal code is 10 989 . Its capital and largest city is Vienna . #/s
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (0.01),"Alivia Laing is a British television presenter and actress. She is known for her work on BBC One's Ancient Rome and subsequently, Rome Realtors. Her latest television series isbased on the life of the renowned farmers, bakers, andlosers of eighteenth century England.",-0.12,"#s Al ivia La ing is a British television presenter and actress . She is known for her work on BBC One 's Ancient Rome and subsequently , Rome Re alt ors . Her latest television series is based on the life of the renowned farmers , b akers , and los ers of eighteenth century England . #/s"
,,,,
