<a href="https://colab.research.google.com/github/ChaitaliV/Objective-criterias-to-quantify-the-accuracy-of-explanation/blob/main/Metrices/Stability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Stability measures how well the explanation model is able to capture the behavior of the black box model under different conditions or perturbations. A higher stability indicates that the explanation model is more robust and reliable.



In [1]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: ignored

In [2]:
!pip install shap
!pip install lime
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting shap
  Downloading shap-0.41.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (572 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.4/572.4 kB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
Collecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.41.0 slicer-0.0.7
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.w

In [3]:
import torch
import pandas as pd
import numpy as np
import shap
from transformers import BertTokenizer
import scipy as sp
import torch
import lime
import tensorflow as tf
from lime.lime_text import LimeTextExplainer
import transformers
import torch.nn as nn
import random

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
model=torch.load(r'/content/drive/MyDrive/Saved Models/MuRIL.pt',map_location=torch.device('cuda'))
tokenizer = BertTokenizer.from_pretrained('google/muril-base-cased', do_lower_case=True)  

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [6]:
df = pd.read_csv('/content/drive/MyDrive/test.csv')
df.head()

Unnamed: 0,hindi_text,label
0,मैं न्यूयॉर्क में अपने दादा-दादी के साथ पैदा ह...,0
1,मैं किशोरावस्था से ही गंभीर अवसाद के दौर से गु...,1
2,मुझे शौक के तौर पर खाना बनाना पसंद है,0
3,"मैं इन दिनों बहुत कम महसूस कर रहा हूं, ऐसा महस...",1
4,हाल ही में ब्रिटेन की महारानी एलिजाबेथ का निधन...,2


In [8]:
test_data = df['hindi_text'][3:20].tolist()
test_label = df['label'][3:20].tolist()

## LIME

In [9]:
# Define a function to preprocess the text data for the MuRIL model

def adapter(data):
  #inputs = tokenizer(data, return_tensors="pt")
  inputs = tokenizer(data, 
          return_tensors='pt', 
          padding=True, 
          truncation=True, 
          max_length=128)

  with torch.no_grad():
      #m = model(**inputs)
      #print(model(**inputs).logits)
      logits = model(**inputs).logits
      scores = model(**inputs)[0].softmax(1).detach().cpu().numpy()
      # predictions = tf.nn.softmax(logits)
      # prediction_label = class_name[np.argmax(predictions[0])]
  
  return scores

In [10]:
## define LIME explainer
class_name = ['Introduction','Depression','Grey Area']
explainer = LimeTextExplainer(class_names = class_name)

In [11]:
def lime_scores(lime_explanations):
  """
  extracts score from explanation tuples
  """
  lime_scores = []
  for tuple in lime_explanations:
    feature, score = tuple
    lime_scores.append(score)
  
  return lime_scores

def add_hindi_noise(text):
    """
    Adds 0.01 noise to each character in a Hindi text.
    """
    noise_text = ''
    for char in text:
        if char != ' ':
            unicode_val = ord(char)
            unicode_val += random.randint(-1, 1)
            noise_char = chr(unicode_val)
            noise_text += noise_char
        else:
            noise_text += ' '
    return noise_text

In [13]:
def calculate_lime_stability(texts, explainer, model):
    num_features = 10
    num_samples=7
    threshold=0.1
    expl1 = explainer.explain_instance(texts,adapter)
    exp1_as_list = lime_scores(expl1.as_list())
    score = []
    for j in range(num_samples):
        # Add noise to the input text
        noisy_text = add_hindi_noise(texts)
        expl2 = explainer.explain_instance(noisy_text,adapter)
        exp2_as_list = lime_scores(expl2.as_list())
        diff = np.abs(np.array(exp1_as_list) - np.array(exp2_as_list))
        diff_mean = np.mean(diff)
        score.append(diff_mean)
    score = np.array(score)
    lime_stability.append(np.sum(score < threshold) / float(len(score)))
    

In [12]:
lime_stability = []

In [14]:
test_data[0]

'मैं इन दिनों बहुत कम महसूस कर रहा हूं, ऐसा महसूस हो रहा है कि मेरा जीवन एक ब्लैक-होल है'

In [44]:
calculate_lime_stability(test_data[16], explainer, model)

In [45]:
test_data[16]

'मेरा आखिरी मनोचिकित्सक वास्तव में एक दयालु डॉक्टर था जिसने वास्तव में मेरी बात सुनी'

In [46]:
lime_stability

[1.0,
 1.0,
 0.8571428571428571,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.5714285714285714,
 1.0]

## SHAP

In [None]:
## SHAP adaptor and predictor
def adapter_shap(data):
  #inputs = tokenizer(data, return_tensors="pt")
  inputs = tokenizer(data, 
          return_tensors='pt', 
          padding=True, 
          truncation=True, 
          max_length=128)

  with torch.no_grad():
      logits = model(**inputs).logits
      scores = model(**inputs)[0].softmax(1).detach().cpu().numpy()

  predicted_class_id = logits.argmax().item()
  classifications = ['Introduction','Depression','Grey Area']
  classification = logits.argmax().item()
  class_name = classifications[classification]
  return class_name

def predictor(x):
    probas = model(**tokenizer(x, return_tensors="pt", padding=True))[0].softmax(1).detach().cpu().numpy()
    # logits = model(**tokenizer(x, return_tensors="pt", padding=True)).logits
    # predicted_class_id = logits.argmax().item()
    # classifications = ['Introduction','Depression','Grey Area']
    # classification = logits.argmax().item()
    # class_name = classifications[classification]
    # print(class_name)
    s = sp.special.logit(probas[:,1])
    return s

def f_batch(x):
    val = np.array([])
    for i in x:
      val = np.append(val, predictor(i))
    return val

In [None]:
explainer = shap.Explainer(f_batch, tokenizer)
def explain(data):
  test = { 'text': [data]}
  shap_values = explainer(test)
  values = shap_values[0].values
  return values.tolist()


In [None]:
shap_stability = []

In [None]:
def calculate_shap_stability(texts, model):
    num_features = 10
    num_samples=10
    threshold=0.1
    expl1 = explain(texts)
    score = []
    max_features = 50
    if len(expl1) < max_features:
        expl1_padded = np.pad(expl1, (0, max_features - len(expl1)), mode='constant')
    for j in range(num_samples):
        # Add noise to the input text
        noisy_text = add_hindi_noise(texts)
        expl2 = explain(noisy_text)
        if len(expl2) < max_features:
          expl2_padded = np.pad(expl2, (0, max_features - len(expl2)), mode='constant')
        diff = np.abs(np.array(expl1_padded) - np.array(expl2_padded))
        diff_mean = np.mean(diff)
        score.append(diff_mean)
    score = np.array(score)
    shap_stability.append(np.sum(score < threshold) / float(len(score)))

In [None]:
for text in test_data[11:]:
  calculate_shap_stability(text,model)

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer: 2it [00:10, 10.06s/it]               


  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/462 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/306 [00:00<?, ?it/s]

  0%|          | 0/380 [00:00<?, ?it/s]

  0%|          | 0/462 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/380 [00:00<?, ?it/s]

  0%|          | 0/462 [00:00<?, ?it/s]

  0%|          | 0/462 [00:00<?, ?it/s]

  0%|          | 0/342 [00:00<?, ?it/s]

  0%|          | 0/462 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/306 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer: 2it [00:10, 10.05s/it]               


  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer: 2it [00:10, 10.03s/it]               


  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

In [None]:
shap_stability

[1.0,
 0.0,
 1.0,
 0.0,
 0.8,
 0.3,
 0.0,
 0.6,
 1.0,
 1.0,
 0.0,
 0.0,
 0.3,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.4]