# Text Classification

* Learn to load a Transformers model
* Use it to predict classes

In [2]:
import torch

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [5]:
MODEL_PATH = 'roberta-base-go_emotions/'
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
model = model.to(device)

In [7]:
query = 'I am not having a great day.'

In [8]:
inputs = tokenizer(query, return_tensors='pt', truncation=True)
inputs = inputs.to(device)
inputs

{'input_ids': tensor([[  0, 100, 524,  45, 519,  10, 372, 183,   4,   2]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [9]:
outputs = model(**inputs)

In [10]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-5.8912, -6.4097, -5.1344, -2.3781, -4.4519, -5.2010, -5.6352, -6.0904,
         -5.8536,  0.0178, -2.8869, -5.3134, -5.1938, -5.9966, -5.9364, -7.3348,
         -5.6248, -5.0768, -5.9956, -4.2687, -5.9012, -7.4587, -4.2042, -5.7734,
         -5.2262, -0.6325, -6.4586, -2.8483]], device='cuda:0',
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [11]:
label2id = model.config.label2id
label2id

{'admiration': 0,
 'amusement': 1,
 'anger': 2,
 'annoyance': 3,
 'approval': 4,
 'caring': 5,
 'confusion': 6,
 'curiosity': 7,
 'desire': 8,
 'disappointment': 9,
 'disapproval': 10,
 'disgust': 11,
 'embarrassment': 12,
 'excitement': 13,
 'fear': 14,
 'gratitude': 15,
 'grief': 16,
 'joy': 17,
 'love': 18,
 'nervousness': 19,
 'neutral': 27,
 'optimism': 20,
 'pride': 21,
 'realization': 22,
 'relief': 23,
 'remorse': 24,
 'sadness': 25,
 'surprise': 26}

In [12]:
logits = outputs.logits
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
probs = probs.detach().numpy()

In [13]:
probs

array([0.0027561 , 0.00164284, 0.00585602, 0.08485793, 0.01152236,
       0.00548085, 0.00355732, 0.00225947, 0.00286126, 0.5044489 ,
       0.05280719, 0.00490112, 0.00552023, 0.00248114, 0.00263443,
       0.000652  , 0.00359444, 0.00620086, 0.0024836 , 0.01380645,
       0.00272877, 0.0005761 , 0.01471279, 0.00309946, 0.00534504,
       0.3469431 , 0.00156451, 0.05476674], dtype=float32)

In [14]:
for i, k in enumerate(label2id.keys()):
    label2id[k] = probs[i]

In [15]:
label2id

{'admiration': 0.0027561015,
 'amusement': 0.00164284,
 'anger': 0.0058560176,
 'annoyance': 0.084857926,
 'approval': 0.011522359,
 'caring': 0.005480847,
 'confusion': 0.003557316,
 'curiosity': 0.002259471,
 'desire': 0.002861263,
 'disappointment': 0.5044489,
 'disapproval': 0.05280719,
 'disgust': 0.0049011176,
 'embarrassment': 0.0055202344,
 'excitement': 0.0024811446,
 'fear': 0.0026344326,
 'gratitude': 0.00065199763,
 'grief': 0.0035944376,
 'joy': 0.0062008603,
 'love': 0.0024836042,
 'nervousness': 0.01380645,
 'neutral': 0.0027287717,
 'optimism': 0.0005760981,
 'pride': 0.014712791,
 'realization': 0.0030994583,
 'relief': 0.0053450353,
 'remorse': 0.3469431,
 'sadness': 0.001564508,
 'surprise': 0.054766744}

In [16]:
label2id = {k: v for k, v in sorted(label2id.items(), key=lambda item: item[1], reverse=True)}
label2id

{'disappointment': 0.5044489,
 'remorse': 0.3469431,
 'annoyance': 0.084857926,
 'surprise': 0.054766744,
 'disapproval': 0.05280719,
 'pride': 0.014712791,
 'nervousness': 0.01380645,
 'approval': 0.011522359,
 'joy': 0.0062008603,
 'anger': 0.0058560176,
 'embarrassment': 0.0055202344,
 'caring': 0.005480847,
 'relief': 0.0053450353,
 'disgust': 0.0049011176,
 'grief': 0.0035944376,
 'confusion': 0.003557316,
 'realization': 0.0030994583,
 'desire': 0.002861263,
 'admiration': 0.0027561015,
 'neutral': 0.0027287717,
 'fear': 0.0026344326,
 'love': 0.0024836042,
 'excitement': 0.0024811446,
 'curiosity': 0.002259471,
 'amusement': 0.00164284,
 'sadness': 0.001564508,
 'gratitude': 0.00065199763,
 'optimism': 0.0005760981}