In [70]:
import torch
from torchinfo import summary
from transformers import AutoModelForSequenceClassification
from src.utils.model_loader import load_model
from src.config.config_loader import get_config


config = get_config(config_path="../src/config/config.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForSequenceClassification.from_pretrained(config["model"]["name"])
model = load_model(model, "../" + config["training"]["early_stopping"]["checkpoint_path"], device)
model.eval()

sample_input = {
    "input_ids": torch.zeros((config["training"]["batch_size"], 
                              config["data"]["tokenizer"]["max_length"]), 
                              dtype=torch.long, device=device),
    "attention_mask": torch.ones((config["training"]["batch_size"], 
                                  config["data"]["tokenizer"]["max_length"]), 
                                  dtype=torch.long, device=device)}
summary(model, input_data=sample_input)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  state_dict = torch.load(path, map_location=device)


Layer (type:depth-idx)                                            Output Shape              Param #
RobertaForSequenceClassification                                  [64, 3]                   --
‚îú‚îÄRobertaModel: 1-1                                               [64, 128, 768]            --
‚îÇ    ‚îî‚îÄRobertaEmbeddings: 2-1                                     [64, 128, 768]            --
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-1                                        [64, 128, 768]            38,603,520
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-2                                        [64, 128, 768]            768
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-3                                        [64, 128, 768]            394,752
‚îÇ    ‚îÇ    ‚îî‚îÄLayerNorm: 3-4                                        [64, 128, 768]            1,536
‚îÇ    ‚îÇ    ‚îî‚îÄDropout: 3-5                                          [64, 128, 768]            --
‚îÇ    ‚îî‚îÄRobertaEncoder: 2-2                                        [

In [71]:
from transformers import AutoTokenizer

msgs = ["I love you", "I hate you", "üëå", "üòë", "üçÜ"]
tokenizer = AutoTokenizer.from_pretrained(config["data"]["tokenizer"]["name"])
inputs = tokenizer(
    msgs,
    padding=True,
    truncation=True,
    max_length=config["data"]["tokenizer"]["max_length"],
    return_tensors="pt"
).to(device)
outputs = model(**inputs)

print(inputs)
print(outputs.logits)

{'input_ids': tensor([[    0,   100,   657,    47,     2],
        [    0,   100,  4157,    47,     2],
        [    0, 31193, 14285,     2,     1],
        [    0, 18636,  3602,     2,     1],
        [    0,  6569,  8384, 27819,     2]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 0],
        [1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1]], device='cuda:0')}
tensor([[-1.5161, -2.6633,  3.7886],
        [ 4.9565, -2.0291, -3.4290],
        [-3.3985, -2.7841,  6.2077],
        [ 6.1912, -2.6779, -3.8871],
        [-3.8872,  2.7872,  1.4353]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


In [73]:
import torch.nn.functional as F

for msg, probas in zip(msgs, F.softmax(outputs.logits, dim=1)):
    class_decoder = {v: k for k, v in config["data"]["label_map"].items()}
    proba = probas.max().item()
    pred = class_decoder[probas.argmax().item()]
    print(f'"{msg}" is {pred} ({proba:.2%})')    

"I love you" is Positive (99.35%)
"I hate you" is Negative (99.88%)
"üëå" is Positive (99.98%)
"üòë" is Negative (99.98%)
"üçÜ" is Neutral (79.36%)
