In [12]:
from pathlib import Path
import torch
from torchinfo import summary
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from src.utils.getters import get_config
from src.utils.model_loader import load_model
from src.models.lstm_sentiment_classifier import LSTMSentimentClassifier

config = get_config(config_path="../config.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

# path = "../" + config["training"]["early_stopping"]["checkpoint_path"]
# model = AutoModelForSequenceClassification.from_pretrained(config["data"]["tokenizer"]["name"])
tokenizer = AutoTokenizer.from_pretrained(config["data"]["tokenizer"]["name"])
model = LSTMSentimentClassifier(vocab_size=tokenizer.vocab_size,
                                embedding_dim= config["model"]["embedding_dim"],
                                hidden_dim=    config["model"]["hidden_dim"],
                                output_dim=    config["model"]["output_dim"],
                                n_layers=      config["model"]["n_layers"],
                                bidirectional= config["model"]["bidirectional"],
                                dropout=       config["model"]["dropout"])
path = Path("../") / config["training"]["early_stopping"]["checkpoint_path"] / "LSTMSentimentClassifier.pt"
model = load_model(model, path, device)
model.eval()

sample_input = {
    "input_ids": torch.zeros((config["training"]["batch_size"], 
                              config["data"]["tokenizer"]["max_length"]), 
                              dtype=torch.long, device=device),
    "attention_mask": torch.ones((config["training"]["batch_size"], 
                                  config["data"]["tokenizer"]["max_length"]), 
                                  dtype=torch.long, device=device)}
summary(model, input_data=sample_input)

Layer (type:depth-idx)                   Output Shape              Param #
LSTMSentimentClassifier                  [64, 3]                   --
├─Embedding: 1-1                         [64, 128, 768]            38,603,520
├─Dropout: 1-2                           [64, 128, 768]            --
├─LSTM: 1-3                              [64, 128, 512]            5,255,168
├─Dropout: 1-4                           [64, 512]                 --
├─Linear: 1-5                            [64, 3]                   1,539
Total params: 43,860,227
Trainable params: 43,860,227
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 45.52
Input size (MB): 0.13
Forward/backward pass size (MB): 83.89
Params size (MB): 175.44
Estimated Total Size (MB): 259.46

In [14]:
sep = tokenizer.sep_token
msgs = [f"Dota2{sep}The professional dota 2 scene is fucking exploding and I completely welcome it. \n\nGet the garbage out.",
        f"TomClancysGhostRecon{sep}See😒😒😒 this the mess I'm talking about smh. Tryna play before I head into work ugh. Now I gotta wait 🤬🤬 #ghostreconbreakpoint instagram.com/p/CFKOwBiFdrv/…",
        f"Nvidia{sep}just re-installed NVidia drivers using this guide if-not-true-then-false.com/2015/fedora-nv… and... no, @firefox is still laggy with @figma",
        f"CallOfDuty{sep}Call of duty logic: Nerfs the M4 7 times\n\nKeeps helicopter, makes akimbo .357 stupid cheating 🥴",
        f"MaddenNFL{sep}Nice game @EAMaddenNFL 👍 pic.twitter.com/csVm607lov"]

inputs = tokenizer(msgs, 
                   max_length=config["data"]["tokenizer"]["max_length"],
                   padding="max_length", 
                   truncation=True, 
                   return_tensors="pt",
                   return_token_type_ids=True).to(device)
outputs = model(**inputs)
outputs = outputs.logits if hasattr(outputs, "logits") else outputs

print(inputs)
print(outputs)

{'input_ids': tensor([[    0,   495,  6804,   176,     2,   133,  2038,   385,  6804,   132,
          1310,    16, 23523, 28976,     8,    38,  2198,  2814,    24,     4,
          1437, 50118, 50118, 14181,     5, 11671,    66,     4,     2,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,  

In [16]:
import torch.nn.functional as F

for msg, probas in zip(msgs, F.softmax(outputs, dim=1)):
    class_decoder = {v: k for k, v in config["data"]["label_map"].items()}
    proba = probas.max().item()
    pred = class_decoder[probas.argmax().item()]
    print(f'"{msg}" is {pred} ({proba:.2%})')    

"Dota2</s>The professional dota 2 scene is fucking exploding and I completely welcome it. 

Get the garbage out." is Neutral (43.25%)
"TomClancysGhostRecon</s>See😒😒😒 this the mess I'm talking about smh. Tryna play before I head into work ugh. Now I gotta wait 🤬🤬 #ghostreconbreakpoint instagram.com/p/CFKOwBiFdrv/…" is Neutral (42.16%)
"Nvidia</s>just re-installed NVidia drivers using this guide if-not-true-then-false.com/2015/fedora-nv… and... no, @firefox is still laggy with @figma" is Neutral (47.46%)
"CallOfDuty</s>Call of duty logic: Nerfs the M4 7 times

Keeps helicopter, makes akimbo .357 stupid cheating 🥴" is Negative (39.82%)
"MaddenNFL</s>Nice game @EAMaddenNFL 👍 pic.twitter.com/csVm607lov" is Negative (82.14%)
