In [16]:
import torch


In [48]:
texts = [
    "I loved the movie!",
    "Absolutely terrible.",
    "Not bad at all",
    "Waste of time.",
    "Fantastic performance!"
]
labels = [1, 0, 1, 0, 1]

In [49]:
# tokenization and Encoding: 
# 1.Adds [CLS], [SEP] tokens automatically
# 2.Pads and truncates to the same length
# 3.Returns PyTorch tensors ready for the model

from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
labels_tensor = torch.tensor(labels)


In [50]:
# This version of BERT has a classification head on top
# num_labels=2 → binary classification
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [51]:
# training loop

from torch.optim import Adam
from torch.nn.functional import cross_entropy

optimizer = Adam(model.parameters(), lr=2e-5) # Adam used as optimizer- it updates the model weights using gradient descent 
                                                # lr=2e-5 is a common learning rate for fine-tuning BERT (very small to avoid destroying pretrained weights)
for epoch in range(2): # 5 full passes
    model.train()
    optimizer.zero_grad() # Clears gradients from the previous batch
    
    outputs = model(**inputs, labels=labels_tensor) # forward pass: Sends your tokenized input through the BERT model
    loss = outputs.loss # return loss: tell how wrong the prediction is 
    logits = outputs.logits # return logit
    
    loss.backward() # computes gradients of the loss w.r.t all model parameters
    optimizer.step() # update model weights

     # make prediction and evaluate
    preds = logits.argmax(dim=1)
    acc = (preds == labels_tensor).float().mean()
    print(f"Epoch {epoch+1} | Loss: {loss.item():.4f} | Accuracy: {acc:.2f}")


Epoch 1 | Loss: 0.7540 | Accuracy: 0.40
Epoch 2 | Loss: 0.6620 | Accuracy: 0.60


In [52]:
new_texts = [
    "I hated the ending.",
    "What a masterpiece!",
    "Not bad, could be better.",
    "Total waste of time.",
    "Absolutely loved every second."
]

new_inputs = tokenizer(new_texts, padding=True, truncation=True, return_tensors="pt")

model.eval()
with torch.no_grad():
    logits = model(**new_inputs).logits
    preds = logits.argmax(dim=1)

for text, pred in zip(new_texts, preds):
    sentiment = "Positive 😊" if pred.item() == 1 else "Negative 😞"
    print(f"{text} → {sentiment}")


I hated the ending. → Positive 😊
What a masterpiece! → Positive 😊
Not bad, could be better. → Positive 😊
Total waste of time. → Positive 😊
Absolutely loved every second. → Positive 😊


312.5