Install the required libraries

In [None]:
# for cpu !pip3 install torch torchvision torchaudio
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip3 install transformers
!pip3 install shap
!pip3 install xformers
!pip3 install ipywidgets
!pip3 install scipy
!pip3 install pandas
!pip3 install numpy

Load the required libraries

In [None]:
import shap
import transformers
import pandas as pd
import torch
import numpy as np
import scipy as sp
import ipywidgets as widgets
from IPython.display import display, clear_output

Code for Prediction and Explainability based on SHAP

In [None]:
# Check if CUDA is available; otherwise, use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the XLNet model from the PT file
model_path = "model_checkpoint_epoch_RoBERTa_Classifier_1.pt"  # Replace with the path to your PT file
model_state_dict = torch.load(model_path, map_location=device)
model = transformers.RobertaForSequenceClassification.from_pretrained("roberta-base", state_dict=model_state_dict)
model.to(device)

# Load the tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained("roberta-base")

In [None]:
# Load the dataset from CSV
csv_file = "IMDB_Reviews_Top_250_preprocessed_without_stopwords.csv"
df = pd.read_csv(csv_file)

In [None]:
# Set the threshold for binary conversion
threshold = 7.0

# Convert the 'Rating' column to a binary variable
df['Binary Rating'] = df['Rating'].apply(lambda x: 1 if x >= threshold else 0)

In [None]:
# Check which are the labels
labels = list(df['Binary Rating'].unique())
print(labels)

In [None]:
# build a pipeline object to do predictions
pipeline = transformers.pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None)

# explainer = shap.Explainer(pipeline)
explainer = shap.Explainer(pipeline, output_names=labels)

In [None]:
# Function for Prediction and SHAP Values
def score_and_visualization(text):
  prediction = pipeline([text])
  print(prediction[0])

  explainer = shap.Explainer(pipeline)
  shap_values = explainer([text])

  shap.plots.text(shap_values)

In [None]:
# Create a loop for reviews that are in the dataset
for text in df['Review Text'][:1]:
    score_and_visualization(text)

Widget for adding a custom review

In [None]:
# Text input widget for the movie review
review_input = widgets.Text(
    value='This movie is great!',
    description='Movie Review:',
    layout=widgets.Layout(width='auto', flex='1 1 auto')
)

In [None]:
# Button widget to trigger the function execution
button = widgets.Button(description='SHAP Score and Visualization', layout=widgets.Layout(width='auto', flex='0 0 auto'))

# Output widget to display the function output
output = widgets.Output()

In [None]:
# Define the function to be executed on button click
def on_button_click(b):
    with output:
        clear_output()  # Clear the output cell before displaying new output
        your_movie_review = review_input.value
        score_and_visualization(your_movie_review)

button.on_click(on_button_click)

In [None]:
# Create a label for the input field
label = widgets.Label(value='Enter your movie review:')

In [None]:
# Arrange widgets in a responsive layout using HBox and VBox
input_box = widgets.VBox([label, review_input, button], layout=widgets.Layout(width='100%'))
display(widgets.VBox([input_box, output]))