# Install libraries

In [None]:
from IPython.display import clear_output
!pip install alibi[tensorflow]
clear_output()

In [None]:
!pip install transformers
clear_output()

# Import libraries

In [None]:
# Import necessary modules
import tensorflow as tf
import numpy as np
import pandas as pd
import re
from alibi.explainers import IntegratedGradients
import matplotlib as plt

# Render color to explain
from IPython.display import HTML

In [None]:
%whos

Variable              Type        Data/Info
-------------------------------------------
HTML                  type        <class 'IPython.core.display.HTML'>
IntegratedGradients   ABCMeta     <class 'alibi.explainers.<...>nts.IntegratedGradients'>
clear_output          function    <function clear_output at 0x7d2ba968a8c0>
np                    module      <module 'numpy' from '/us<...>kages/numpy/__init__.py'>
pd                    module      <module 'pandas' from '/u<...>ages/pandas/__init__.py'>
plt                   module      <module 'matplotlib' from<...>/matplotlib/__init__.py'>
re                    module      <module 're' from '/usr/lib/python3.10/re.py'>
tf                    module      <module 'tensorflow' from<...>/tensorflow/__init__.py'>


# Define some functions

In [None]:
# Preprocess and clean the reviews
def preprocess_reviews (reviews) :
  # Preprocess the text
  REPLACE_NO_SPACE = re.compile("[.;:,!\'?\"()\[\]]")
  REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")

  reviews = [REPLACE_NO_SPACE.sub("", line.lower()) for line in reviews]
  reviews = [REPLACE_WITH_SPACE.sub(" ", line) for line in reviews]
  return reviews

In [None]:
# Tokenize the sentences for the BERT Model
def process_sentences(sentence, tokenizer, max_len):
  # Tokenize the text sentences.
  z = tokenizer(sentence,
                add_special_tokens = False,
                padding = 'do_not_pad',
                max_length = max_len,
                truncation = True,
                return_token_type_ids = False,
                return_attention_mask = True,
                return_tensors = 'np')
  # z = tokenizer.encode(sentence[0],
  #               add_special_tokens = True, # add [CLS], [SEP]
  #               padding = 'do_not_pad',
  #               max_length = max_len, # max length of the text that can go to BERT
  #               truncation = True,
  #               return_token_type_ids = False,
  #               return_attention_mask = True,
  #               return_tensors = 'np')
  return z

In [None]:
# Define Wrapper for making sentiment predictions with pretrained BERT Model
class AutoModelWrapper(tf.keras.Model):
  def __init__(self, model_bert, **kwargs):
    super().__init__()
    self.model_bert = model_bert

  def call(self, inputs, attention_mask=None):
    out = self.model_bert(inputs, attention_mask=attention_mask)
    return tf.nn.softmax(out.logits)

  def get_config(self):
    return {}

  @classmethod
  def from_config(cls, config):
    return cls(**config)

In [None]:
# Compute hex colors based on the attributions for a single instance.
# Uses a diverging colorscale by default and normalizes and scales
# the colormap so that colors are consistent with the attributions.
def colorize(attrs, cmap='PiYG'):
  cmap_bound = np.abs(attrs).max()
  norm = plt.colors.Normalize(vmin=-cmap_bound, vmax=cmap_bound)
  cmap = plt.cm.get_cmap(cmap)

  # now compute hex values of colors
  colors = list(map(lambda x: plt.colors.rgb2hex(cmap(norm(x))), attrs))
  return colors

In [None]:
# Return HTML markup highlighting text with the desired color.
def hlstr(string, color='white'):
  return f"<mark style=background-color:{color}>{string} </mark>"

In [None]:
def replace_cls_sep(lst, replacement):
    for i in range(len(lst) - 1, -1, -1):
        if lst[i] != 0:
            lst[i] = replacement
            break
    lst[0] = replacement
    return lst

def display_output(text_in_arr, tokenizer, baseline_type, max_len):
  # process_test_sample_2 = process_sentences(text_in_arr, tokenizer, max_len)
  process_test_sample_2 = process_sentences(preprocess_reviews(text_in_arr), tokenizer, max_len)
  # print(process_test_sample_2)
  x_test_sample_2 = process_test_sample_2['input_ids']
  kwargs_2 = {k: tf.constant(v) for k, v in process_test_sample_2.items() if k == 'attention_mask'}
  probability = auto_model(x_test_sample_2, **kwargs_2).numpy()
  predictions_2 = probability.argmax(axis=1)

  # if baseline_type == "blur":
  #   baseline = get_blur_baseline(x_test_sample_2)
  # elif baseline_type == "none":
  #   baseline = None
  # else:
  #   baseline = None

  baseline = None
  replace_cls_sep(kwargs_2['attention_mask'].numpy()[0], 0)

  explanation_2 = ig.explain(
    X               = x_test_sample_2,
    forward_kwargs  = kwargs_2,
    baselines       = baseline,
    target          = predictions_2
  )
  attrs_2 = np.array(explanation_2.attributions[0]).sum(axis=2)

  # Visualize the results
  words_2 = tokenizer.decode(x_test_sample_2[0]).split()
  attrs_2 = np.array(explanation_2.attributions[0])
  attrs_2 = attrs_2.sum(axis=2)
  colors_2 = colorize(attrs_2[0])
  # print(attrs_2[0])
  print('-----\nPositive = {}% -> Label {}'.format(probability[0][1]*100, predictions_2))
  return display(HTML("".join(list(map(hlstr, words_2, colors_2)))))

In [None]:
%whos

Variable              Type        Data/Info
-------------------------------------------
AutoModelWrapper      type        <class '__main__.AutoModelWrapper'>
HTML                  type        <class 'IPython.core.display.HTML'>
IntegratedGradients   ABCMeta     <class 'alibi.explainers.<...>nts.IntegratedGradients'>
clear_output          function    <function clear_output at 0x7d2ba968a8c0>
colorize              function    <function colorize at 0x7d2a59e509d0>
display_output        function    <function display_output at 0x7d2a59e508b0>
hlstr                 function    <function hlstr at 0x7d2a59e50280>
np                    module      <module 'numpy' from '/us<...>kages/numpy/__init__.py'>
pd                    module      <module 'pandas' from '/u<...>ages/pandas/__init__.py'>
plt                   module      <module 'matplotlib' from<...>/matplotlib/__init__.py'>
preprocess_reviews    function    <function preprocess_reviews at 0x7d2b8c5d5cf0>
process_sentences     function    <

# Setup Model and IG

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [1]:
from transformers import TFAutoModelForSequenceClassification, BertTokenizer

model_save_path = '/content/gdrive/MyDrive/ColabNotebooks/IG2/Saved_BERT_model'
tokenizer_save_path = '/content/gdrive/MyDrive/ColabNotebooks/IG2/Saved_BERT_model'

# Load the saved model weights
auto_model_bert = TFAutoModelForSequenceClassification.from_pretrained(model_save_path)

# Load the saved tokenizer
tokenizer = BertTokenizer.from_pretrained(tokenizer_save_path)

HFValidationError: ignored

In [None]:
auto_model_bert.summary()

In [None]:
max_len = 512

In [None]:
# 1st parameter for IG function
# Making sentiment predictions with pretrained BERT Model
auto_model = AutoModelWrapper(auto_model_bert)

# 2nd parameter for IG function
# Layer with respect to which the gradients are calculated
block = auto_model.layers[0].layers[0].embeddings

# 3rd parameter for IG function
# Method for the integral approximation
method = "gausslegendre"

# 4th parameter for IG function
# Number of step in the path integral approximation from the baseline
n_steps = 32

# 5th parameter for IG function
# Batch size for the internal batching
internal_batch_size = 1

# IG function
ig = IntegratedGradients(
  model               = auto_model,
  layer               = block,
  method              = method,
  n_steps             = n_steps,
  internal_batch_size = internal_batch_size
)

In [None]:
type(block)

# Apply model and explain

In [None]:
# Test sentence for the sentiment analysis
# z_test_sample = ['This is the best movie i have ever watch, there is nothing bad to say about that film']
z_test_sample = ['I found the tech support to be very responsive and helpful']

# Process and tokenize the sentences
z_test_sample = process_sentences(z_test_sample, tokenizer, max_len)
print(z_test_sample)

In [None]:
# 1st parameter for explain function
# Instance for which integrated gradients attribution are computed
x_test_sample = z_test_sample['input_ids']

# 2nd parameter for explain function
# Get tensors for model prediction
kwargs = {k: tf.constant(v) for k, v in z_test_sample.items() if k == 'attention_mask'}

# 4th parameter for explain function
# Get the prediction outputs
predictions = auto_model(x_test_sample, **kwargs).numpy().argmax(axis=1)

# Explain function
explanation = ig.explain(
  X               = x_test_sample,
  forward_kwargs  = kwargs,
  baselines       = None,
  target          = predictions
)

In [None]:
predictions

# Visualize results

In [None]:
attrs = np.array(explanation.attributions[0])
attrs = attrs.sum(axis=2)
print('Attributions shape: ', attrs.shape)
words = tokenizer.decode(x_test_sample[0]).split()
colors = colorize(attrs[0])

In [None]:
print('Predicted label = {}'. format(predictions))
HTML("".join(list(map(hlstr, words, colors))))

In [None]:
attrs

# Extra data

In [None]:
import time
from IPython.display import display

extra_single_sentences = [ ["Bad"],
  ["The delivery was late and the product was damaged."],
  ["The customer service at the restaurant was exceptional."],
  ["The airline staff was not helpful during our travel delay."],
  ["I found the tech support to be very responsive and helpful."],
  ["The gym staff is always friendly and supportive."],
  ["The service at the car repair shop was not satisfactory."],
  ["I had a great experience with the customer service team."],
  ["The cleaning service did an excellent job with my apartment."],
  ["The customer support was unable to resolve my issue."],
  ["The hotel provided excellent room service during our stay."],
  ["The medical staff was caring and made the visit less stressful."],
  ["I had a poor experience with the airport security staff."],
  ["The service at the spa was exceptional and relaxing."],
  ["The mechanic was honest and fixed my car efficiently."],
  ["The food delivery was quick and the food was still hot."],
  ["The taxi service was unreliable and the driver was impolite."],
  ["The concert staff ensured a safe and enjoyable experience for everyone."],
  ["The property management service is always responsive to our needs."],
  ["The amusement park staff was very friendly and helpful."],
  ["The delivery service did not handle the package with care."],
  ["The security service at the event was not up to the mark."],
  ["The staff at the post office was very helpful in resolving my issue."],
  ["The customer service at the internet company was very poor."],
  ["The hospital staff provided excellent care during my stay."],
  ["The customer service at the phone company was not able to resolve my issue."],
  ["The technical support was unable to fix the issue with my computer."],
  ["The management at the hotel was not responsive to our complaints."],
  ["The service at the pet store was excellent and the staff was very helpful."],
  ["The staff at the grocery store was not friendly."],
  ["The customer service at the online store was quick to respond and very helpful."]
]

extra_double_sentences = [ ["Good."],
  ["Although the food was delicious, the service was slow and unresponsive."],
  ["The hotel was clean and comfortable, but the staff was rude and unhelpful."],
  ["Despite the high prices, the quality of the products was excellent."],
  ["The customer service representative was friendly and knowledgeable, but the wait time on the phone was too long."],
  ["Although the location was convenient, the room was small and cramped."],
  ["The website was easy to navigate, but the checkout process was confusing and frustrating."],
  ["Despite the crowded atmosphere, the bartender was attentive and made great drinks."],
  ["The prices were reasonable, but the portion sizes were disappointingly small."],
  ["Although the store was busy, the sales associate was patient and helpful."],
  ["The spa was relaxing and rejuvenating, but the prices were a bit steep."],
  ["Despite the long wait time, the doctor was thorough and attentive during the appointment."],
  ["Although the delivery was fast, the food was cold and not as described."],
  ["The gym was well-equipped and clean, but the music was too loud and distracting."],
  ["Despite the limited menu options, the food was flavorful and well-prepared."],
  ["Although the staff was friendly, the hotel room was not as clean as expected."],
  ["The customer service was prompt and efficient, but the product was not as advertised."],
  ["Despite the high price tag, the quality of the service was worth it."],
  ["Although the restaurant was busy, the hostess was able to find us a table quickly."],
  ["The store had a great selection of products, but the checkout line was long and slow-moving."],
  ["Despite the inclement weather, the tour guide was knowledgeable and engaging."],
  ["Although the hotel was in a great location, the noise level was too high to get a good night's sleep."],
  ["The coffee shop had a cozy atmosphere, but the coffee itself was not very good."],
  ["Despite the friendly staff, the food took a long time to arrive at the table."],
  ["Although the website had a lot of information, it was difficult to find what I was looking for."],
  ["The customer service team was able to resolve my issue quickly, but the problem should not have occurred in the first place."],
  ["Despite the low prices, the quality of the products was surprisingly good."],
  ["Although the restaurant had a unique atmosphere, the food was not very flavorful."],
  ["The airline had comfortable seats and good in-flight entertainment, but the food was mediocre."],
  ["Despite the long wait time, the doctor was able to diagnose my condition accurately."],
  ["Although the hotel room was spacious, the air conditioning was not working properly."]
]

extra_long_sentence = ['I recently had the opportunity to try out a service that left a lasting impression on me. From the moment I entered, the atmosphere was welcoming and comfortable. The staff members were attentive, friendly, and professional. They demonstrated excellent knowledge and expertise in their field, ensuring that every step of the service was executed flawlessly. The attention to detail was remarkable, and I appreciated the careful consideration given to my specific needs and preferences. The service itself was outstanding, providing me with a sense of satisfaction and contentment. I left feeling rejuvenated and grateful for the exceptional experience. I highly recommend this service to anyone seeking a truly memorable experience']


# Extra test

In [None]:
display_output(extra_single_sentences[3], tokenizer, "none", max_len)

In [None]:
display_output(extra_long_sentence, tokenizer, "none", max_len)

In [None]:
for z_test_sample_2 in extra_single_sentences:
  display_output(z_test_sample_2, tokenizer, "none", max_len)

In [None]:
for z_test_sample_2 in extra_double_sentences:
  display_output(z_test_sample_2, tokenizer, "none", max_len)

In [None]:
for i in extra_single_sentences:
    test_extra_sentence = i[0]
    predict_input = tokenizer.encode(test_extra_sentence,
                                     truncation=True,
                                     padding=True,
                                     return_tensors="np")
    # print(predict_input)
    tf_output = auto_model_bert.predict(predict_input)[0]
    tf_prediction = tf.nn.softmax(tf_output, axis=1)
    labels = ['Negative','Positive'] #(0:negative, 1:positive)
    label = tf.argmax(tf_prediction, axis=1)
    label = label.numpy()
    percentage = round(100*tf_prediction.numpy()[0][1], 4)
    print(percentage, "\n", i[0], sep="")

In [None]:
for i in extra_single_sentences:
    test_extra_sentence = i[0]
    predict_input = tokenizer.encode(test_extra_sentence,
                                     truncation=True,
                                     padding=True,
                                     return_tensors="tf")
    print(predict_input)
    tf_output = auto_model_bert.predict(predict_input)[0]
    tf_prediction = tf.nn.softmax(tf_output, axis=1)
    labels = ['Negative','Positive'] #(0:negative, 1:positive)
    label = tf.argmax(tf_prediction, axis=1)
    label = label.numpy()
    percentage = round(100*tf_prediction.numpy()[0][1], 4)
    print(percentage, "\n", i[0], sep="")