In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer.encode_plus(
            self.texts[idx],
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_tensors='pt',
            return_attention_mask=True,
            truncation=True
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(self.labels[idx])
        }

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # Adjust num_labels based on your sentiment classes

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:

dataset = load_dataset("zeroshot/twitter-financial-news-sentiment")
content = dataset['train']['text']
labels = dataset['train']['label']

Downloading readme: 100%|██████████| 1.57k/1.57k [00:00<00:00, 4.26MB/s]
Downloading data: 100%|██████████| 859k/859k [00:00<00:00, 1.81MB/s]
Downloading data: 100%|██████████| 217k/217k [00:00<00:00, 314kB/s]s]
Downloading data files: 100%|██████████| 2/2 [00:01<00:00,  1.70it/s]
Extracting data files: 100%|██████████| 2/2 [00:00<00:00, 1029.91it/s]
Generating train split: 9543 examples [00:00, 180396.72 examples/s]
Generating validation split: 2388 examples [00:00, 374653.92 examples/s]


In [5]:
train_data = SentimentDataset(content, labels, tokenizer)
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)

In [6]:
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
batch_size = 16



In [7]:
print_interval = 1
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_idx,batch in enumerate(train_loader):
        input_ids, attention_mask, labels = batch['input_ids'], batch['attention_mask'], batch['label']
        optimizer.zero_grad()

        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        if batch_idx % print_interval == 0:
            average_loss = total_loss / (batch_idx + 1)
            print(f'Epoch {epoch + 1}/{num_epochs}, Batch {batch_idx + 1}/{len(train_loader)}, Loss: {average_loss:.4f}')

    average_loss_epoch = total_loss / len(train_loader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {average_loss_epoch:.4f}')




Epoch 1/3, Batch 1/597, Loss: 0.9691
Epoch 1/3, Batch 2/597, Loss: 1.0669
Epoch 1/3, Batch 3/597, Loss: 1.1120
Epoch 1/3, Batch 4/597, Loss: 1.0746
Epoch 1/3, Batch 5/597, Loss: 1.0709
Epoch 1/3, Batch 6/597, Loss: 1.0237
Epoch 1/3, Batch 7/597, Loss: 0.9959
Epoch 1/3, Batch 8/597, Loss: 0.9773
Epoch 1/3, Batch 9/597, Loss: 0.9466
Epoch 1/3, Batch 10/597, Loss: 0.9707
Epoch 1/3, Batch 11/597, Loss: 0.9575
Epoch 1/3, Batch 12/597, Loss: 0.9509
Epoch 1/3, Batch 13/597, Loss: 0.9458
Epoch 1/3, Batch 14/597, Loss: 0.9335
Epoch 1/3, Batch 15/597, Loss: 0.9132
Epoch 1/3, Batch 16/597, Loss: 0.9109
Epoch 1/3, Batch 17/597, Loss: 0.9146
Epoch 1/3, Batch 18/597, Loss: 0.9121
Epoch 1/3, Batch 19/597, Loss: 0.9041
Epoch 1/3, Batch 20/597, Loss: 0.9080
Epoch 1/3, Batch 21/597, Loss: 0.9062
Epoch 1/3, Batch 22/597, Loss: 0.9000
Epoch 1/3, Batch 23/597, Loss: 0.8979
Epoch 1/3, Batch 24/597, Loss: 0.8923
Epoch 1/3, Batch 25/597, Loss: 0.8913
Epoch 1/3, Batch 26/597, Loss: 0.8851
Epoch 1/3, Batch 27/5

In [8]:
filename="fine_tuned_sentiment_model-tfns.pt"
torch.save(model,filename)