In [1]:
import torch

# Set random seed for reproducibility
torch.manual_seed(42)

# Handle device placement
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

# Disable Hugging Face Hub symlink warning
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'

# Specify the model's name
model_name = "google-bert/bert-base-uncased"

# Load the tokenizer to turn text into numbers
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the model with a classification head
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # 2 labels (positive,negative)

# Move model to device
model.to(device)

  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|██████████| 199/199 [00:00<00:00, 1534.03it/s, Materializing param=bert.pooler.dense.weight]                               
[1mBertForSequenceClassification LOAD REPORT[0m from: google-bert/bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
classifier.weight                          | MISSING    | 
classifier.bias                            | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from differ

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [3]:
from datasets import load_dataset, Dataset
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load IMDB dataset to train our model from datasets
dataset = load_dataset("imdb")

# Create proper train/val split
train_val_split = dataset["train"].train_test_split(test_size=0.2, seed=42)
train_dataset = train_val_split["train"].shuffle(seed=42)
val_dataset = train_val_split["test"].shuffle(seed=42)

# Tokenize the data 
def tokenize_function(data):
    return tokenizer(data["text"], padding="max_length", truncation=True, max_length=512) # Truncation fixes sequence length

# Define compute_metrics for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = predictions.argmax(axis=1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./bert-sentiment",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    learning_rate=5e-5,
    per_device_train_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=100,
)

In [4]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

# Fine-Tuning
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.216247,0.187472,0.9258,0.906977,0.949322,0.927666
2,0.11375,0.204079,0.9326,0.930186,0.935754,0.932962
3,0.04839,0.272328,0.9362,0.944693,0.926975,0.93575


Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  3.91it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.18it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  3.95it/s]
There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.encoder.layer.1.output.LayerNorm.weight', 'bert.encoder.layer.1.output.LayerNorm.bias', 'bert.encoder.layer.2.attention.output.LayerNorm.weight', 'bert.encoder.layer.2.attention.output.LayerNorm.bias', 'bert.encoder.layer.2.output.LayerNorm.weight', 'bert.encoder.layer.2.output.LayerNorm.bias', 'bert.encoder.layer.3.attention.output.LayerN

TrainOutput(global_step=1875, training_loss=0.13993103332519533, metrics={'train_runtime': 2081.8696, 'train_samples_per_second': 28.82, 'train_steps_per_second': 0.901, 'total_flos': 1.57866633216e+16, 'train_loss': 0.13993103332519533, 'epoch': 3.0})

In [5]:
# Load IMDB dataset to test our model
import pandas as pd
test_dataset = pd.read_csv('IMDB Dataset.csv')

# Convert to Hugging Face Dataset (assuming columns are 'review' and 'sentiment')
# Map sentiment to labels: e.g., 'positive' -> 1, 'negative' -> 0
test_dataset['label'] = test_dataset['sentiment'].map({'positive': 1, 'negative': 0})
test_hf = Dataset.from_pandas(test_dataset[['review', 'label']].rename(columns={'review': 'text'}))

# Tokenize the test data
tokenized_test = test_hf.map(tokenize_function, batched=True)

# Evaluate on test set
test_results = trainer.evaluate(tokenized_test)
print("Test Results:", test_results)

# Additional metrics 
predictions = trainer.predict(tokenized_test)
y_pred = predictions.predictions.argmax(axis=1)
y_true = predictions.label_ids

from sklearn.metrics import accuracy_score, classification_report
print("Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred))

Map: 100%|██████████| 50000/50000 [00:08<00:00, 5695.36 examples/s]


Test Results: {'eval_loss': 0.16441792249679565, 'eval_accuracy': 0.96138, 'eval_precision': 0.9625085207907293, 'eval_recall': 0.96016, 'eval_f1': 0.9613328260477783, 'eval_runtime': 539.056, 'eval_samples_per_second': 92.755, 'eval_steps_per_second': 11.594, 'epoch': 3.0}
Accuracy: 0.96138
              precision    recall  f1-score   support

           0       0.96      0.96      0.96     25000
           1       0.96      0.96      0.96     25000

    accuracy                           0.96     50000
   macro avg       0.96      0.96      0.96     50000
weighted avg       0.96      0.96      0.96     50000

