In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from typing import List

import torch
from torch.utils.data import DataLoader
from torch.jit import RecursiveScriptModule

# other libraries
from typing import Final

# own modules
from src.model_utils import set_seed
from src.model_utils import load_model
from src.model_utils import predict_single_text
from src.model_utils import load_w2v_model

from lime.lime_text import LimeTextExplainer

%matplotlib inline

In [2]:
# static variables
DATA_PATH: Final[str] = "NLP_Data/data"
NUM_CLASSES: Final[int] = 10
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# set device
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
set_seed(42)

In [3]:
# load the model
model: RecursiveScriptModule = load_model("best_model")
# Load the w2v model
w2vec_model = load_w2v_model()


Explain the model with LIME:

In [4]:
file_path = DATA_PATH + '/test.csv'
data: pd.DataFrame = pd.read_csv(file_path)

# replace the target column with a binary representation
data['tag'] = data['account.type'].replace('human', 0)
data['tag'] = data['tag'].replace('bot', 1)
# Only keep columns text and tag
data = data[['text', 'tag']]

print(data.head())

                                                text  tag
0  justin timberlake really one of the goats if y...    0
1  Thank you @PMBhutan for your gracious prayers ...    0
2  Theory: the number of red lights you will hit ...    0
3  Respects on the Upt of the I good with the peo...    1
4  Might give the BASIC #10Liner game contest ano...    0


  data['tag'] = data['tag'].replace('bot', 1)


In [5]:
# Test the model with a single text
text = data['text'][0]
print(text)
predicted = predict_single_text(text, model, device)
print(f"Predicted: {predicted}, Real: {data['tag'][0]}")

justin timberlake really one of the goats if you think about it
Tokenizing text: justin timberlake really one of the goats if you think about it
Type: <class 'str'>
Predicted: 1, Real: 0


  return forward_call(*args, **kwargs)


In [6]:
def predict(text: str) -> int:
    return predict_single_text(text, model, device)

In [7]:
class_names = [0, 1]
print(f"Explaining result for: {text}")
explainer = LimeTextExplainer(class_names = class_names)
exp = explainer.explain_instance(text, predict, num_features=6)


Explaining result for: justin timberlake really one of the goats if you think about it
Tokenizing text: ['justin timberlake really one of the goats if you think about it', '  really one  the   you think  ', 'justin timberlake  one  the goats   think about it', '       if    ', 'justin   one   goats   think  ', 'justin  really   the goats if you   it', '    of the   you think about ', '     the  if    ', 'justin timberlake really one  the  if you think about ', 'justin  really   the   you  about ', '           it', '      goats     ', 'justin    of the  if    ', ' timberlake really   the  if you think  it', 'justin timberlake really  of the  if  think  it', '  really       think about it', '  really  of      about it', 'justin timberlake really one  the  if you  about it', 'justin  really one of     think  it', ' timberlake really one   goats if you think  ', 'justin timberlake really one of the goats if   about it', ' timberlake     goats    about it', '           ', 'justin timberlake

TypeError: expected string or bytes-like object