In [2]:
import torch
# import evaluate
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

In [3]:
dataset = load_dataset("imdb")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [6]:
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

In [7]:
train_dataset = tokenized_datasets["train"].shuffle(seed=69).select(range(1000))  
test_dataset = tokenized_datasets["test"].shuffle(seed=69).select(range(200))

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds)
    return {"accuracy": acc, "f1": f1}

In [11]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=10,  # log every 10 steps
    report_to="none",  # disable wandb etc.
    disable_tqdm=False,  # show progress bar
    fp16=True,  # enable faster training if on GPU
)

In [12]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.4248,0.41868,0.81,0.791209
2,0.2239,0.324792,0.87,0.878505




TrainOutput(global_step=64, training_loss=0.4148192321881652, metrics={'train_runtime': 122.1622, 'train_samples_per_second': 16.372, 'train_steps_per_second': 0.524, 'total_flos': 526222110720000.0, 'train_loss': 0.4148192321881652, 'epoch': 2.0})

In [13]:
results = trainer.evaluate()
print("Evaluation Results:", results)



Evaluation Results: {'eval_loss': 0.3247924745082855, 'eval_accuracy': 0.87, 'eval_f1': 0.8785046728971961, 'eval_runtime': 3.6229, 'eval_samples_per_second': 55.204, 'eval_steps_per_second': 1.932, 'epoch': 2.0}


In [15]:
model.save_pretrained("./sentiment-bert")
tokenizer.save_pretrained("./sentiment-bert")

('./sentiment-bert/tokenizer_config.json',
 './sentiment-bert/special_tokens_map.json',
 './sentiment-bert/vocab.txt',
 './sentiment-bert/added_tokens.json')

In [19]:
def predict_sentiment(text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    model.to(device)

    # Tokenize and move to device
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits, dim=1).item()

    return "Positive" if prediction == 1 else "Negative"

sample_text = "The movie was abysmal!"
print(f"Sample Prediction: {predict_sentiment(sample_text)}")

Sample Prediction: Negative
