In [None]:
!pip install torch torchvision torchaudio
!pip install transformers
!pip install datasets
!pip install nltk
!pip install scikit-learn
!pip install datasets

In [2]:
# Import necessary libraries
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from datasets import load_dataset
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import random
import os

nltk.download('punkt')

# Set random seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [4]:
# Load the dataset
dataset = load_dataset("rotten_tomatoes")
train_raw = dataset['train']
validation_raw = dataset['validation']
test_raw = dataset['test']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.46k [00:00<?, ?B/s]

train.parquet:   0%|          | 0.00/699k [00:00<?, ?B/s]

validation.parquet:   0%|          | 0.00/90.0k [00:00<?, ?B/s]

test.parquet:   0%|          | 0.00/92.2k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [5]:
# Initialize BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Define maximum sequence length
MAX_LEN = 128

# Define the custom Dataset class for BERT
class BERTSentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.float)
        }

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



In [6]:
# Create datasets for training, validation, and testing
train_dataset = BERTSentimentDataset(
    texts=train_raw['text'],
    labels=train_raw['label'],
    tokenizer=tokenizer,
    max_len=MAX_LEN
)

val_dataset = BERTSentimentDataset(
    texts=validation_raw['text'],
    labels=validation_raw['label'],
    tokenizer=tokenizer,
    max_len=MAX_LEN
)

test_dataset = BERTSentimentDataset(
    texts=test_raw['text'],
    labels=test_raw['label'],
    tokenizer=tokenizer,
    max_len=MAX_LEN
)

In [7]:
# Define the DataLoaders
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=1,  # Binary classification
    output_attentions=False,
    output_hidden_states=False
)

model.to(device)

# Define optimizer with weight decay
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)

# Total number of training steps
total_steps = len(train_loader) * 10  # Assuming 10 epochs

# Create the learning rate scheduler with warm-up
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)

# Define loss function
criterion = torch.nn.BCEWithLogitsLoss()

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
# Function to calculate metrics
def evaluate(model, data_loader):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits.squeeze()
            preds = torch.sigmoid(logits)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    # Convert predictions to binary
    binary_preds = [1 if p >= 0.5 else 0 for p in predictions]

    acc = accuracy_score(true_labels, binary_preds)
    f1 = f1_score(true_labels, binary_preds)

    return acc, f1

In [9]:
# Training loop with validation and early stopping
num_epochs = 10
patience = 3
best_val_f1 = 0
epochs_no_improve = 0
best_model_path = 'best_bert_model.pt'

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in train_loader:
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits.squeeze()

        loss = criterion(logits, labels)
        total_loss += loss.item()

        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(train_loader)

    # Validation
    val_accuracy, val_f1 = evaluate(model, val_loader)

    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Training Loss: {avg_train_loss:.4f}')
    print(f'Validation Accuracy: {val_accuracy:.4f}')
    print(f'Validation F1 Score: {val_f1:.4f}')

    # Check for improvement
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        epochs_no_improve = 0
        torch.save(model.state_dict(), best_model_path)
        print('Validation F1 improved, saving model.')
    else:
        epochs_no_improve += 1
        print(f'No improvement in validation F1 for {epochs_no_improve} epoch(s).')
        if epochs_no_improve >= patience:
            print('Early stopping triggered!')
            break

# Load the best model
model.load_state_dict(torch.load(best_model_path))
model.to(device)

# Evaluate on test set
test_accuracy, test_f1 = evaluate(model, test_loader)
print(f'\nTest Accuracy: {test_accuracy:.4f}')
print(f'Test F1 Score: {test_f1:.4f}')

Epoch 1/10
Training Loss: 0.3969
Validation Accuracy: 0.8490
Validation F1 Score: 0.8488
Validation F1 improved, saving model.
Epoch 2/10
Training Loss: 0.2200
Validation Accuracy: 0.8565
Validation F1 Score: 0.8605
Validation F1 improved, saving model.
Epoch 3/10
Training Loss: 0.1233
Validation Accuracy: 0.8565
Validation F1 Score: 0.8645
Validation F1 improved, saving model.
Epoch 4/10
Training Loss: 0.0727
Validation Accuracy: 0.8602
Validation F1 Score: 0.8634
No improvement in validation F1 for 1 epoch(s).
Epoch 5/10
Training Loss: 0.0353
Validation Accuracy: 0.8555
Validation F1 Score: 0.8603
No improvement in validation F1 for 2 epoch(s).
Epoch 6/10
Training Loss: 0.0194
Validation Accuracy: 0.8546
Validation F1 Score: 0.8590
No improvement in validation F1 for 3 epoch(s).
Early stopping triggered!


  model.load_state_dict(torch.load(best_model_path))



Test Accuracy: 0.8349
Test F1 Score: 0.8414
