In [1]:
import torch
from torchinfo import summary
from transformers import AutoModelForSequenceClassification
from src.utils.getters import get_config
from src.utils.model_loader import load_model

config = get_config(config_path="../src/config/config.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForSequenceClassification.from_pretrained(config["data"]["tokenizer"]["name"])
model = load_model(model, "../" + config["training"]["early_stopping"]["checkpoint_path"], device)
model.eval()

sample_input = {
    "input_ids": torch.zeros((config["training"]["batch_size"], 
                              config["data"]["tokenizer"]["max_length"]), 
                              dtype=torch.long, device=device),
    "attention_mask": torch.ones((config["training"]["batch_size"], 
                                  config["data"]["tokenizer"]["max_length"]), 
                                  dtype=torch.long, device=device)}
summary(model, input_data=sample_input)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  state_dict = torch.load(path, map_location=device)


Layer (type:depth-idx)                                            Output Shape              Param #
RobertaForSequenceClassification                                  [64, 3]                   --
├─RobertaModel: 1-1                                               [64, 128, 768]            --
│    └─RobertaEmbeddings: 2-1                                     [64, 128, 768]            --
│    │    └─Embedding: 3-1                                        [64, 128, 768]            38,603,520
│    │    └─Embedding: 3-2                                        [64, 128, 768]            768
│    │    └─Embedding: 3-3                                        [64, 128, 768]            394,752
│    │    └─LayerNorm: 3-4                                        [64, 128, 768]            1,536
│    │    └─Dropout: 3-5                                          [64, 128, 768]            --
│    └─RobertaEncoder: 2-2                                        [64, 128, 768]            --
│    │    └─ModuleList: 3-6 

In [4]:
from transformers import AutoTokenizer

msgs = ["I love you", "I hate you", "👌", "😑", "🍆",
        "America<\s>AMERICA IS BACK. 🇺🇸 \n Every single day I will be fighting for you with every \
        breath in my body. I will not rest until we have delivered the strong, safe \
        and prosperous America that our children deserve and that you deserve. This \
        will truly be the golden age of America.",
        "Biden preemptively pardoned the January 6th committee. Everything they told \
        you about January 6th was a lie. Now we know for a fact that these treasonous,\
        corrupt officials are guilty."]
tokenizer = AutoTokenizer.from_pretrained(config["data"]["tokenizer"]["name"])
inputs = tokenizer(
    msgs,
    padding=True,
    truncation=True,
    max_length=config["data"]["tokenizer"]["max_length"],
    return_tensors="pt"
).to(device)
outputs = model(**inputs)

print(inputs)
print(outputs.logits)

  "America<\s>AMERICA IS BACK. 🇺🇸 \n Every single day I will be fighting for you with every \


{'input_ids': tensor([[    0,   100,   657,    47,     2,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1],
        [    0,   100,  4157,    47,     2,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1, 

In [5]:
import torch.nn.functional as F

for msg, probas in zip(msgs, F.softmax(outputs.logits, dim=1)):
    class_decoder = {v: k for k, v in config["data"]["label_map"].items()}
    proba = probas.max().item()
    pred = class_decoder[probas.argmax().item()]
    print(f'"{msg}" is {pred} ({proba:.2%})')    

"I love you" is Positive (82.98%)
"I hate you" is Negative (93.93%)
"👌" is Positive (99.15%)
"😑" is Negative (99.86%)
"🍆" is Neutral (93.03%)
"America<\s>AMERICA IS BACK. 🇺🇸 
 Every single day I will be fighting for you with every         breath in my body. I will not rest until we have delivered the strong, safe         and prosperous America that our children deserve and that you deserve. This         will truly be the golden age of America." is Positive (74.07%)
"Biden preemptively pardoned the January 6th committee. Everything they told         you about January 6th was a lie. Now we know for a fact that these treasonous,        corrupt officials are guilty." is Neutral (99.98%)
