In [20]:
import sys 
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import AutoModel, AutoTokenizer, utils
from bertviz import model_view, head_view
utils.logging.set_verbosity_error()  # Suppress standard warnings


class Roberta_SentimentModel:
    def __init__(self, 
                 model_name="siebert/sentiment-roberta-large-english", 
                 output_dir = "",
                 batch_size=512,
                 load_best = False,
                 output_attentions = True):
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.output_dir = output_dir
        self.load_best = load_best
        self.output_attentions = output_attentions
        if not self.load_best:
            print('Loading pre-trained model: ', self.model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name, 
                                                                            output_attentions=self.output_attentions)
        else:
            if len(self.output_dir) == 0:
                print('error: output_dir!')
                return
            print('Loading best model from: ', self.output_dir)
            self.model = AutoModelForSequenceClassification.from_pretrained(self.output_dir, 
                                                                            output_attentions=self.output_attentions) 

        self.batch_size = batch_size
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.model.eval()

    def get_sentiment(self, text_list):
        # Tokenize the input text
        inputs = self.tokenizer(text_list, 
                                truncation=True, 
                                padding=True, 
                                return_tensors="pt", 
                                max_length=256)
        input_ids = inputs["input_ids"].to(self.device)
        attention_mask = inputs["attention_mask"].to(self.device)
            
        # Perform sentiment analysis in batches
        results = []
        
        num_batches = input_ids.size(0) // self.batch_size
        for i in range(num_batches + 1):
            start = i * self.batch_size
            end = (i + 1) * self.batch_size
            end = min((i + 1) * self.batch_size, input_ids.size(0))
            if start < end:
                input_ids_batch = input_ids[start:end]
                attention_mask_batch = attention_mask[start:end]
                with torch.no_grad():
                    outputs = self.model(input_ids_batch, attention_mask=attention_mask_batch)
                batch_results = torch.argmax(outputs.logits, dim=1).tolist()
                results.extend(batch_results)
        return results

In [21]:
roberta_test = Roberta_SentimentModel()

Loading pre-trained model:  siebert/sentiment-roberta-large-english


In [22]:
roberta_test.get_sentiment(['I love you', 'I hate you', 'LLMs: Teaching machines jokes, but they still don’t laugh back!', 'LLMs unlock endless possibilities, making AI smarter, faster, and friendlier!']) 

[1, 0, 0, 1]

In [23]:
inputs = roberta_test.tokenizer(
                    ['I love you', 
                     'LLMs unlock endless possibilities, making AI smarter, faster, and friendlier!'], 
                    truncation=True, 
                    padding=True, 
                    return_tensors="pt", 
                    max_length=256)

In [24]:
inputs['input_ids']

tensor([[    0,   100,   657,    47,     2,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1],
        [    0,  6006, 13123, 14852, 12210, 11550,     6,   442,  4687, 18369,
             6,  3845,     6,     8,  1441, 14730,   328,     2]])

In [25]:
inputs['attention_mask']

tensor([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [26]:
tokens = roberta_test.tokenizer.convert_ids_to_tokens(inputs['input_ids'][1].tolist())
print(tokens)


['<s>', 'LL', 'Ms', 'Ġunlock', 'Ġendless', 'Ġpossibilities', ',', 'Ġmaking', 'ĠAI', 'Ġsmarter', ',', 'Ġfaster', ',', 'Ġand', 'Ġfriend', 'lier', '!', '</s>']


## Visualize attention

In [54]:
input_text = 'Welcome to our LLM seminar tooooo unlock endless possibilities!!!!!!!!'
inputs = roberta_test.tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text

outputs = roberta_test.model(inputs.to(roberta_test.device))  # Run model

attention = outputs[-1]  # Retrieve attention from model outputs
tokens = roberta_test.tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings

In [55]:
print(len(tokens))
tokens


14


['<s>',
 'Welcome',
 'Ġto',
 'Ġour',
 'ĠLL',
 'M',
 'Ġseminar',
 'Ġtoo',
 'ooo',
 'Ġunlock',
 'Ġendless',
 'Ġpossibilities',
 '!!!!!!!!',
 '</s>']

In [56]:
print(len(attention), attention[0].shape)  # Number of layers, attention shape

24 torch.Size([1, 16, 14, 14])


In [57]:
from bertviz import model_view
model_view(attention, tokens) 

<IPython.core.display.Javascript object>