## Developing model interpretability

**Author:** Shaun Khoo  
**Date:** 10 Jan 2022  
**Context:** Useful to have some interpretability for our black-box model to debug / understand why it's making predictions  
**Objective:** Test the use of Captum and see if it is applicable to our model   

#### A) Importing the libraries and data

In [3]:
import captum
from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization
import torch
import spacy
from ssoc_autocoder import model_training
from transformers import DistilBertModel, DistilBertTokenizer

In [4]:
from captum.attr import visualization as viz
from captum.attr import LayerConductance, LayerIntegratedGradients
from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization

In [1]:
import os
os.chdir('..')

import pandas as pd
test = pd.read_csv('Data/Train/Test.csv')

#### B) Defining the variables and parameters

In [2]:
title = test['title'][0]
text = test['description'][0]

In [9]:
encoding = model_training.import_ssoc_idx_encoding(ssoc_idx_encoding_filepath)

In [5]:
device = "cuda"

In [6]:
model_parameters = {
    'pretrained_model': 'Models/mcf-pretrained-5epoch',
    'local_files_only': True,
    'max_level': 5
}
tokenizer_filepath = 'Models/distilbert-tokenizer-pretrained'
ssoc_idx_encoding_filepath = 'Data/Reference/ssoc-idx-encoding.json'
model_filepath = 'Models/autocoder-v2pt2-6jan-pretrained5epoch-34epoch.pt'

In [7]:
# Initialise the model and tokenizer objects
model = model_training.HierarchicalSSOCClassifier_V2pt2(model_parameters)
tokenizer = DistilBertTokenizer.from_pretrained(tokenizer_filepath)

# Read in the trained parameters
model.load_state_dict(torch.load(model_filepath))

Some weights of the model checkpoint at Models/mcf-pretrained-5epoch were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [8]:
model.to('cuda')
model.eval()
model.zero_grad()

In [17]:
def predict(title_ids, title_mask, text_ids, text_mask):
    all_preds = model(title_ids, title_mask, text_ids, text_mask)
    output = all_preds['SSOC_5D']
    return output.start_logits, output.end_logits

In [18]:
def squad_pos_forward_func(title_ids, title_mask, text_ids, text_mask, position=0):
    pred = predict(title_ids, title_mask, text_ids, text_mask)
    pred = pred[position]
    return pred.max(1).values

In [19]:
ref_token_id = tokenizer.pad_token_id # A token used for generating token reference
sep_token_id = tokenizer.sep_token_id # A token used as a separator between question and text and it is also added to the end of the text.
cls_token_id = tokenizer.cls_token_id # A token used for prepending to the concatenated question-text word sequence

In [50]:
pred = m(model(title_ids, title_mask, text_ids, text_mask)['SSOC_5D'])

In [53]:
pred_ind = torch.argmax(pred[0]).item()

ValueError: only one element tensors can be converted to Python scalars

In [73]:
pred[0, 612]

tensor(-1.5240, grad_fn=<SelectBackward>)

In [9]:
device = 'cuda'

In [14]:
# accumalate couple samples in this array for visualization purposes
vis_data_records_ig = []
m = torch.nn.Softmax(dim=1)
lig = LayerIntegratedGradients(model, model.l1.embeddings.word_embeddings)
token_reference = TokenReferenceBase(reference_token_idx = tokenizer.pad_token_id)
def interpret_sentence(model, title, text, label = 0):
    model.zero_grad()
    title_inputs = tokenizer(
            text=text,
            text_pair=None,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )

    text_inputs = tokenizer(
        text=text,
        text_pair=None,
        add_special_tokens=True,
        max_length=512,
        padding='max_length',
        return_token_type_ids=True,
        truncation=True
    )

    title_ids = torch.tensor([title_inputs['input_ids']], device = device, dtype=torch.long)
    title_mask = torch.tensor([title_inputs['attention_mask']], device = device, dtype=torch.long)
    text_ids = torch.tensor([text_inputs['input_ids']], device = device, dtype=torch.long)
    text_mask = torch.tensor([text_inputs['attention_mask']], device = device, dtype=torch.long)
    
    # predict
    pred = m(model(title_ids, title_mask, text_ids, text_mask)['SSOC_5D'])[0]
    pred_ind = torch.argmax(pred).item()

    # generate reference indices for each sample
    reference_title_ids = token_reference.generate_reference(512, device=device).unsqueeze(0)
    reference_title_mask = token_reference.generate_reference(512, device=device).unsqueeze(0)
    reference_text_ids = token_reference.generate_reference(512, device=device).unsqueeze(0)
    reference_text_mask = token_reference.generate_reference(512, device=device).unsqueeze(0)
    
    # compute attributions and approximation delta using layer integrated gradients
    attributions_ig, delta = lig.attribute((title_ids, title_mask, text_ids, text_mask), 
                                           (reference_title_ids, reference_title_mask, reference_text_ids, reference_text_mask),
                                           n_steps=50, return_convergence_delta=True)

    print('pred: ', encoding['SSOC_5D']['idx_ssoc'][pred_ind], '(', '%.2f'%pred[pred_ind], ')', ', delta: ', abs(delta))

    #add_attributions_to_visualizer(attributions_ig, text, pred, pred_ind, label, delta, vis_data_records_ig)
    
def add_attributions_to_visualizer(attributions, text, pred, pred_ind, label, delta, vis_data_records):
    attributions = attributions.sum(dim=2).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    attributions = attributions.cpu().detach().numpy()

    # storing couple samples in an array for visualization purposes
    vis_data_records.append(visualization.VisualizationDataRecord(
                            attributions,
                            pred,
                            Label.vocab.itos[pred_ind],
                            Label.vocab.itos[label],
                            Label.vocab.itos[1],
                            attributions.sum(),
                            text,
                            delta))

In [58]:
attributions_ig, delta = interpret_sentence(model, title, text, label = 0)

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 3.03 GiB already allocated; 0 bytes free; 3.04 GiB reserved in total by PyTorch)

In [51]:
if device == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Quadro P1000
Memory Usage:
Allocated: 3.0 GB
Cached:    3.0 GB


In [57]:
torch.cuda.empty_cache()

In [20]:
def construct_input_ref_pair(question, text, ref_token_id, sep_token_id, cls_token_id):
    question_ids = tokenizer.encode(question, add_special_tokens=False)
    text_ids = tokenizer.encode(text, add_special_tokens=False)

    # construct input token ids
    input_ids = [cls_token_id] + question_ids + [sep_token_id] + text_ids + [sep_token_id]

    # construct reference token ids 
    ref_input_ids = [cls_token_id] + [ref_token_id] * len(question_ids) + [sep_token_id] + \
        [ref_token_id] * len(text_ids) + [sep_token_id]

    return torch.tensor([input_ids], device=device), torch.tensor([ref_input_ids], device=device), len(question_ids)

def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]], device=device)
    ref_token_type_ids = torch.zeros_like(token_type_ids, device=device)# * -1
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long, device=device)

    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    return torch.ones_like(input_ids)

def construct_whole_bert_embeddings(input_ids, ref_input_ids, \
                                    token_type_ids=None, ref_token_type_ids=None, \
                                    position_ids=None, ref_position_ids=None):
    input_embeddings = model.bert.embeddings(input_ids, token_type_ids=token_type_ids, position_ids=position_ids)
    ref_input_embeddings = model.bert.embeddings(ref_input_ids, token_type_ids=ref_token_type_ids, position_ids=ref_position_ids)
    
    return input_embeddings, ref_input_embeddings

101

#### C) Another approach: Using `transformers-intepret`

In [1]:
import os
os.chdir('..')

In [6]:
import captum
from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization
import torch
import spacy
from ssoc_autocoder import model_training
from transformers import DistilBertModel, DistilBertTokenizer

In [7]:
model_parameters = {
    'pretrained_model': 'Models/mcf-pretrained-5epoch',
    'local_files_only': True,
    'max_level': 5
}
tokenizer_filepath = 'Models/distilbert-tokenizer-pretrained'
ssoc_idx_encoding_filepath = 'Data/Reference/ssoc-idx-encoding.json'
model_filepath = 'Models/autocoder-4jan-pretrained3epoch-60epoch.pt'

In [11]:
# Reading in the SSOC-index encoding

# Initialise the model and tokenizer objects
model = model_training.HierarchicalSSOCClassifier_V1(model_parameters, encoding)
tokenizer = DistilBertTokenizer.from_pretrained(tokenizer_filepath)

# Read in the trained parameters
model.load_state_dict(torch.load(model_filepath))

Some weights of the model checkpoint at Models/mcf-pretrained-5epoch were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [12]:

from transformers_interpret import SequenceClassificationExplainer

In [13]:
multiclass_explainer = SequenceClassificationExplainer(model=model, tokenizer=tokenizer)

In [28]:
def forward_with_sigmoid(input):
    return torch.sigmoid(model(input))

In [3]:
import pandas as pd
test = pd.read_csv('Data/Train/Test.csv')

In [26]:
word_attributions = multiclass_explainer(text=test['description'][1], class_name = '51421')

In [15]:
multiclass_explainer.predicted_class_index

array(161, dtype=int64)

In [16]:
multiclass_explainer.predicted_class_name

'51411'

In [27]:
html = multiclass_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
669.0,34341 (0.06),51421.0,2.5,"[CLS] understand customer needs & skin condition , and recommend appropriate facial treatments . perform facial treatments with excellent skill and service attitude . provide beauty advice to customers and recommend appropriate skin ##care regime ##n . supervise and mo ##tiv ##ate a team of beau ##tic ##ians to achieve sales targets . efficiently manage stocks and supplies to ensure zero disruption to services . conduct regular performance reviews on team members . show good understanding of the business needs . possess relevant it ##ec , ci ##bt ##ec , cid ##es ##co , or ni ##tec beauty certification ##s . min 3 years experience as a facial therapist at an established spa . well groom ##ed with a pleasant disposition and personality . good communications and inter ##personal skills , and service attitude . effectively bilingual , with ability to handle english and mandarin speaking customers . able to work weekends and public holidays . [SEP]"
,,,,


In [70]:
test.iloc[1]

MCF_Job_Ad_ID                                           MCF-2021-0142643
Predicted_SSOC_2020                                                51421
title                                              Beautician Supervisor
description            Understand customer needs & skin condition, an...
Name: 1, dtype: object

In [38]:
model.config.id2label[265]

'24213'

In [5]:
token_reference = TokenReferenceBase(reference_token_idx = tokenizer.pad_token_id)

HierarchicalSSOCClassifier_Dep(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): 