In [17]:
# Imports
import pandas as pd
from typing import List


import torch
from torch.jit import RecursiveScriptModule


# other libraries
from typing import Final



In [18]:

# own modules
from src.model_utils import load_model
from src.model_utils import predict_single_text
from src.model_utils import load_w2v_model
from src.model_utils import set_seed
from src.RNNModelTrain.data import tokenize_tweet


In [19]:
# static variables
DATA_PATH: Final[str] = "NLP_Data/data"
MODEL_TYPE: Final[str] = "IMDB"  # "TweepFake"

# set device
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
set_seed(42)

In [20]:
# load the model
if MODEL_TYPE == "IMDB":
    model: RecursiveScriptModule = load_model("IMDB_best_model")
else:
    model: RecursiveScriptModule = load_model("best_model")
    
# Load the w2v model
w2vec_model = load_w2v_model()

In [21]:
if MODEL_TYPE == "IMDB":
    file_path = DATA_PATH + '/test.txt'
    data: pd.DataFrame = pd.read_csv(file_path, sep='\t', header=None)
    data.columns = ['text', 'tag']

else:
    file_path = DATA_PATH + '/test.csv'
    data: pd.DataFrame = pd.read_csv(file_path)

    # replace the target column with a binary representation
    data['tag'] = data['account.type'].replace('human', 0)
    data['tag'] = data['tag'].replace('bot', 1)
    # Only keep columns text and tag
    data = data[['text', 'tag']]

print(data.head())

                                                text  tag
0  I first saw The Buddy Holly Story when I was a...    1
1  There were so many things wrong with this movi...    0
2  There's a unique place in the pantheon of John...    1
3  It kicks you in the stomach. There are other f...    1
4  To start, I'm not a person to rate movies that...    0


In [22]:
# Test the model with a single text
text = data['text'][2]
print(text)
predicted = predict_single_text(text, model, device)
print(f"Predicted: {predicted}, Real: {data['tag'][0]}")

There's a unique place in the pantheon of John Ford films for Wagonmaster, Sergeant Rutledge, and The Sun Shines Bright. It was these three films with no box office names in them that Ford didn't have to tailor the film around the persona of a star being it John Wayne, Henry Fonda, or any of the others he worked with. Not surprising that Ford considered all these as favorites of one kind or another. <br /><br />Ben Johnson and Harry Carey, Jr. a couple of likable cowpokes sign on to guide a Mormon wagon train to a valley in Arizona territory. Along the way they are joined first by a group stranded players from a medicine show and then by a family of outlaws on the run named Clegg. Their stories merge and what happens is the basis of the film's plot.<br /><br />Had Wagonmaster been done even 10 years earlier on the strength of the two performances turned in by Johnson and Carey, both probably would have had substantial careers as B picture cowboys. In the case of Johnson it would have b

  return forward_call(*args, **kwargs)


In [23]:
import random
def permutation_importance(text: str, model: torch.nn.Module, n_permutations: int = 100, device: torch.device = device) -> float:
    """
    This function calculates the permutation importance of the classifier_fn function.

    Args:
        text: text to calculate the permutation importance.

    Returns:
        The permutation importance of the classifier_fn function.
    """
    
    # tokenize the text
    tokenized_text: List[str] = tokenize_tweet(text)
    
    # get the real class
    real_class: int = predict_single_text(text, model, device, probability=False, likelihood=False)

    # get the permutation importance
    importance: float = 0
    for _ in range(n_permutations):
        # shuffle the text
        shuffled_text: List[str] = tokenized_text.copy()
        random.shuffle(shuffled_text)
        shuffled_text = " ".join(shuffled_text)

        # get the predictions of the shuffled text
        shuffled_predictions: float = predict_single_text(shuffled_text, model, device, probability=True)

        # update the permutation importance
        importance += shuffled_predictions

    # calculate the permutation importance
    importance /= n_permutations
    importance -= real_class

    return importance

In [24]:

class_names = [0, 1]
print(f"Explaining result for: {text}")
result = permutation_importance(text, model)
print(result)


Explaining result for: There's a unique place in the pantheon of John Ford films for Wagonmaster, Sergeant Rutledge, and The Sun Shines Bright. It was these three films with no box office names in them that Ford didn't have to tailor the film around the persona of a star being it John Wayne, Henry Fonda, or any of the others he worked with. Not surprising that Ford considered all these as favorites of one kind or another. <br /><br />Ben Johnson and Harry Carey, Jr. a couple of likable cowpokes sign on to guide a Mormon wagon train to a valley in Arizona territory. Along the way they are joined first by a group stranded players from a medicine show and then by a family of outlaws on the run named Clegg. Their stories merge and what happens is the basis of the film's plot.<br /><br />Had Wagonmaster been done even 10 years earlier on the strength of the two performances turned in by Johnson and Carey, both probably would have had substantial careers as B picture cowboys. In the case of 