In [1]:
!pip install transformers[torch]

# Import necessary libraries
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch

# Sample data: list of sentences and their labels (0 = negative, 1 = positive)
data = [
    ("I love this product!", 1),
    ("This is a terrible item.", 0),
    ("I will definitely buy this again!", 1),
    ("I really hate this thing!", 0),
    ("What a wonderful experience!", 1),
    ("Worst purchase ever!", 0),
    ("Highly recommended!", 1),
    ("Not worth the money!", 0),
    ("I am so happy with this purchase!", 1),
    ("This was a big disappointment.", 0)
]

# Create a dataset class
class SentimentDataset(Dataset):
    def __init__(self, data):
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.data = [
            self.tokenizer(text, padding='max_length', max_length=512, truncation=True, return_tensors="pt")
            for text, _ in data
        ]
        self.labels = [label for _, label in data]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = {key: val.squeeze() for key, val in self.data[idx].items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Prepare training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

# Initialize dataset
train_dataset = SentimentDataset(data)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

# Train the model
trainer.train()


Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.29.3-py3-none-any.whl.metadata (18 kB)
Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
   ---------------------------------------- 0.0/297.6 kB ? eta -:--:--
   ---- ----------------------------------- 30.7/297.6 kB 1.4 MB/s eta 0:00:01
   -------------------------------------- - 286.7/297.6 kB 4.5 MB/s eta 0:00:01
   ---------------------------------------- 297.6/297.6 kB 4.6 MB/s eta 0:00:00
Installing collected packages: accelerate
Successfully installed accelerate-0.29.3



[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
10,0.7786


TrainOutput(global_step=15, training_loss=0.7895683606465658, metrics={'train_runtime': 101.5068, 'train_samples_per_second': 0.296, 'train_steps_per_second': 0.148, 'total_flos': 7893331660800.0, 'train_loss': 0.7895683606465658, 'epoch': 3.0})