In [14]:
import torch
import torch.nn as nn

# Set random seed for reproducibility
torch.manual_seed(42)

# Handle device placement
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

# Disable Hugging Face Hub symlink warning
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'

# Specify the model's name
model_name = "google-bert/bert-base-uncased"

# Load the tokenizer to turn text into numbers
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the model with a classification head
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # 2 labels (positive,negative)

# Modify classifier for better performance
model.classifier.dropout = nn.Dropout(0.2)  # Increased to 0.2

# Move model to device
model.to(device)

Loading weights: 100%|██████████| 199/199 [00:00<00:00, 1548.51it/s, Materializing param=bert.pooler.dense.weight]                               
[1mBertForSequenceClassification LOAD REPORT[0m from: google-bert/bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identi

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [16]:
from datasets import load_dataset, Dataset
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback, DataCollatorWithPadding
from torch.optim import AdamW
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load IMDB dataset with caching
dataset = load_dataset("imdb" )

# Use full train for training, original test for evaluation
train_dataset = dataset["train"].shuffle(seed=42)
val_dataset = dataset["test"].shuffle(seed=42)

# Tokenize the data with dynamic max_length
def tokenize_function(data):
    return tokenizer(data["text"], truncation=True)

# Define compute_metrics for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = predictions.argmax(axis=1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

'''# Freeze the first 3 layers of BERT to prevent overfitting (unfreeze more for better fine-tuning)
for name, param in model.named_parameters():
    if 'encoder.layer' in name:
        match = re.search(r'encoder\.layer\.(\d+)', name)
        if match:
            layer_num = int(match.group(1))
            if layer_num < 3:
                param.requires_grad = False
'''

# Define parameter groups with different learning rates
encoder_params = [param for name, param in model.named_parameters() if 'encoder' in name and param.requires_grad]  # Unfrozen BERT layers
classifier_params = [param for name, param in model.named_parameters() if 'classifier' in name]  # Classifier head

# Custom optimizer with different LRs
optimizer = AdamW([
    {'params': encoder_params, 'lr': 5e-5},  # Lower LR for encoder
    {'params': classifier_params, 'lr': 1e-4}  # Higher LR for classifier
])

# Define training arguments
training_args = TrainingArguments(
    output_dir="./bert-sentiment",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=2,
    num_train_epochs=10,
    weight_decay=0.02,
    logging_steps=100,
    fp16=True,
    max_grad_norm=1.0, # Gradient Clipping
    lr_scheduler_type = "cosine",
    warmup_steps=100,
    label_smoothing_factor=0.2
) 

  '''# Freeze the first 3 layers of BERT to prevent overfitting (unfreeze more for better fine-tuning)
Map: 100%|██████████| 25000/25000 [00:03<00:00, 6792.64 examples/s]


In [17]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,  # Use dynamic padding
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
    optimizers=(optimizer, None),  # Use custom optimizer with different LRs
)

# Fine-Tuning
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.890058,0.420908,0.91896,0.940677,0.89432,0.916913
2,0.832549,0.413913,0.92348,0.891386,0.96448,0.926494
3,0.776441,0.409009,0.93316,0.925836,0.94176,0.93373
4,0.758693,0.408799,0.93568,0.935471,0.93592,0.935695
5,0.720316,0.415885,0.93336,0.947021,0.91808,0.932326
6,0.707563,0.418805,0.9354,0.92966,0.94208,0.935829
7,0.694793,0.424921,0.93288,0.92856,0.93792,0.933217
8,0.694663,0.424412,0.93476,0.928149,0.94248,0.93526


Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.46it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.81it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.37it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.20it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.31it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.15it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.35it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.20it/s]
There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.e

TrainOutput(global_step=3128, training_loss=0.7698413880584795, metrics={'train_runtime': 3360.3684, 'train_samples_per_second': 74.397, 'train_steps_per_second': 1.164, 'total_flos': 5.259767596608768e+16, 'train_loss': 0.7698413880584795, 'epoch': 8.0})

In [18]:
# Load downloaded test data
import pandas as pd

test_dataset = pd.read_csv('IMDB Dataset.csv')
test_dataset.rename(columns={'review': 'text'}, inplace=True)  # Rename column to match tokenize_function
test_dataset['sentiment'] = test_dataset['sentiment'].map({'positive': 1, 'negative': 0})
test_dataset.rename(columns={'sentiment': 'label'}, inplace=True)  # Rename to 'label' for Trainer
test_hf = Dataset.from_pandas(test_dataset)
tokenized_test = test_hf.map(tokenize_function, batched=True)

# Evaluate on downloaded test set
test_results = trainer.evaluate(tokenized_test)
print("Test Results:", test_results)

# Additional metrics
predictions = trainer.predict(tokenized_test)
y_pred = predictions.predictions.argmax(axis=1)
y_true = predictions.label_ids

from sklearn.metrics import accuracy_score, classification_report
print("Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred))

Map: 100%|██████████| 50000/50000 [00:10<00:00, 4754.42 examples/s]


Test Results: {'eval_loss': 0.379447340965271, 'eval_accuracy': 0.96316, 'eval_precision': 0.9595935540207986, 'eval_recall': 0.96704, 'eval_f1': 0.9633023867394509, 'eval_runtime': 183.8693, 'eval_samples_per_second': 271.932, 'eval_steps_per_second': 33.992, 'epoch': 8.0}
Accuracy: 0.96316
              precision    recall  f1-score   support

           0       0.97      0.96      0.96     25000
           1       0.96      0.97      0.96     25000

    accuracy                           0.96     50000
   macro avg       0.96      0.96      0.96     50000
weighted avg       0.96      0.96      0.96     50000

