In [15]:
# STEP 1: Import Libraries
import torch
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    DataCollatorWithPadding
)
from torch.optim import AdamW
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

# STEP 2: Set Device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# STEP 3: Load the Amazon Polarity Dataset
dataset = load_dataset("amazon_polarity")

# ✅ STEP 4: Subset the Dataset for Faster Training (50K train, 10K test)
dataset["train"] = dataset["train"].select(range(50000))
dataset["test"] = dataset["test"].select(range(10000))

# STEP 5: Rename 'label' to 'labels' to work with Transformers
dataset = dataset.rename_column("label", "labels")

# STEP 6: Load the BERT tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# ✅ STEP 7: Tokenization Function (max_length=128 for speed)
def tokenize_function(batch):
    return tokenizer(batch["content"], padding="max_length", truncation=True, max_length=128)

# Apply tokenizer to the dataset
dataset = dataset.map(tokenize_function, batched=True)

# STEP 8: Set format for PyTorch (keep only required columns)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# STEP 9: Create DataLoaders
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
train_loader = DataLoader(dataset["train"], batch_size=16, shuffle=True, collate_fn=data_collator)
val_loader = DataLoader(dataset["test"], batch_size=16, shuffle=False, collate_fn=data_collator)

# STEP 10: Load Pretrained BERT for Binary Classification
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = model.to(device)

# STEP 11: Define Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# STEP 12: Training Loop
num_epochs = 2
model.train()

for epoch in range(num_epochs):
    total_loss = 0
    print(f"\nEpoch {epoch + 1} / {num_epochs}")
    
    for batch in tqdm(train_loader):
        # Move batch to the selected device (CPU/GPU)
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        # Backward pass and update
        loss.backward()
        optimizer.step()

    # Print average loss for the epoch
    print(f"Epoch {epoch + 1} finished. Average Loss: {total_loss / len(train_loader):.4f}")

    # ✅ Optional: Save model after each epoch
    model.save_pretrained(f"./bert-amazon-epoch{epoch+1}")
    tokenizer.save_pretrained(f"./bert-amazon-epoch{epoch+1}")


Using device: cuda


Map: 100%|██████████| 50000/50000 [00:58<00:00, 859.98 examples/s]
Map: 100%|██████████| 10000/10000 [00:11<00:00, 860.44 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1 / 2


100%|██████████| 3125/3125 [06:53<00:00,  7.55it/s]


Epoch 1 finished. Average Loss: 0.2152

Epoch 2 / 2


100%|██████████| 3125/3125 [07:00<00:00,  7.43it/s]


Epoch 2 finished. Average Loss: 0.1133


In [16]:
# STEP 1: Import Libraries
import torch
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    DataCollatorWithPadding
)
from torch.optim import AdamW
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

# STEP 2: Set Device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# STEP 3: Load the Amazon Polarity Dataset
dataset = load_dataset("amazon_polarity")

# ✅ STEP 4: Subset the Dataset for Faster Training (50K train, 10K test)
dataset["train"] = dataset["train"].select(range(500000))
dataset["test"] = dataset["test"].select(range(40000))

# STEP 5: Rename 'label' to 'labels' to work with Transformers
dataset = dataset.rename_column("label", "labels")

# STEP 6: Load the BERT tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# ✅ STEP 7: Tokenization Function (max_length=128 for speed)
def tokenize_function(batch):
    return tokenizer(batch["content"], padding="max_length", truncation=True, max_length=128)

# Apply tokenizer to the dataset
dataset = dataset.map(tokenize_function, batched=True)

# STEP 8: Set format for PyTorch (keep only required columns)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# STEP 9: Create DataLoaders
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
train_loader = DataLoader(dataset["train"], batch_size=16, shuffle=True, collate_fn=data_collator)
val_loader = DataLoader(dataset["test"], batch_size=16, shuffle=False, collate_fn=data_collator)

# STEP 10: Load Pretrained BERT for Binary Classification
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = model.to(device)

# STEP 11: Define Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# STEP 12: Training Loop
num_epochs = 2
model.train()

for epoch in range(num_epochs):
    total_loss = 0
    print(f"\nEpoch {epoch + 1} / {num_epochs}")
    
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1} finished. Average Loss: {total_loss / len(train_loader):.4f}")

    # ✅ Optional: Save model after each epoch
    model.save_pretrained(f"./bert-amazon-epoch{epoch+1}")
    tokenizer.save_pretrained(f"./bert-amazon-epoch{epoch+1}")

# STEP 13: Evaluation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for batch in tqdm(val_loader, desc="Evaluating"):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions = torch.argmax(outputs.logits, dim=-1)
        correct += (predictions == batch["labels"]).sum().item()
        total += batch["labels"].size(0)

accuracy = correct / total
print(f"\n✅ Evaluation Accuracy: {accuracy:.4f}")


Using device: cuda


Using the latest cached version of the dataset since amazon_polarity couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'amazon_polarity' at /root/.cache/huggingface/datasets/amazon_polarity/amazon_polarity/0.0.0/9d9c45c18f8c3cf1b23a3c27917b60cbf28f3289 (last modified on Mon Apr 14 09:03:07 2025).
Map: 100%|██████████| 500000/500000 [09:30<00:00, 877.15 examples/s]
Map: 100%|██████████| 40000/40000 [00:44<00:00, 889.09 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1 / 2


100%|██████████| 31250/31250 [1:09:44<00:00,  7.47it/s]


Epoch 1 finished. Average Loss: 0.1619

Epoch 2 / 2


100%|██████████| 31250/31250 [1:09:46<00:00,  7.46it/s]


Epoch 2 finished. Average Loss: 0.1088


Evaluating: 100%|██████████| 2500/2500 [01:39<00:00, 25.00it/s]


✅ Evaluation Accuracy: 0.9495





In [17]:
# STEP 1: Import Libraries
import torch
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    DataCollatorWithPadding
)
from torch.optim import AdamW
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

# STEP 2: Set Device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# STEP 3: Load the Amazon Polarity Dataset
dataset = load_dataset("amazon_polarity")

# ✅ STEP 4: Subset the Dataset for Faster Training (50K train, 10K test)
dataset["train"] = dataset["train"].select(range(1000000))
dataset["test"] = dataset["test"].select(range(100000))

# STEP 5: Rename 'label' to 'labels' to work with Transformers
dataset = dataset.rename_column("label", "labels")

# STEP 6: Load the BERT tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# ✅ STEP 7: Tokenization Function (max_length=128 for speed)
def tokenize_function(batch):
    return tokenizer(batch["content"], padding="max_length", truncation=True, max_length=128)

# Apply tokenizer to the dataset
dataset = dataset.map(tokenize_function, batched=True)

# STEP 8: Set format for PyTorch (keep only required columns)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# STEP 9: Create DataLoaders
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
train_loader = DataLoader(dataset["train"], batch_size=16, shuffle=True, collate_fn=data_collator)
val_loader = DataLoader(dataset["test"], batch_size=16, shuffle=False, collate_fn=data_collator)

# STEP 10: Load Pretrained BERT for Binary Classification
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = model.to(device)

# STEP 11: Define Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# STEP 12: Training Loop
num_epochs = 2
model.train()

for epoch in range(num_epochs):
    total_loss = 0
    print(f"\nEpoch {epoch + 1} / {num_epochs}")
    
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1} finished. Average Loss: {total_loss / len(train_loader):.4f}")

    # ✅ Optional: Save model after each epoch
    model.save_pretrained(f"./bert-amazon-epoch{epoch+1}")
    tokenizer.save_pretrained(f"./bert-amazon-epoch{epoch+1}")

# STEP 13: Evaluation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for batch in tqdm(val_loader, desc="Evaluating"):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions = torch.argmax(outputs.logits, dim=-1)
        correct += (predictions == batch["labels"]).sum().item()
        total += batch["labels"].size(0)

accuracy = correct / total
print(f"\n✅ Evaluation Accuracy: {accuracy:.4f}")


Using device: cuda


Map: 100%|██████████| 1000000/1000000 [18:52<00:00, 882.98 examples/s]
Map: 100%|██████████| 100000/100000 [01:53<00:00, 881.09 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1 / 2


100%|██████████| 62500/62500 [2:19:28<00:00,  7.47it/s]  


Epoch 1 finished. Average Loss: 0.1506

Epoch 2 / 2


100%|██████████| 62500/62500 [2:19:30<00:00,  7.47it/s]  


Epoch 2 finished. Average Loss: 0.1098


Evaluating: 100%|██████████| 6250/6250 [04:10<00:00, 24.97it/s]


✅ Evaluation Accuracy: 0.9509





In [1]:
#Import Libraries
import torch
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    DataCollatorWithPadding
)
from torch.optim import AdamW
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

#Set GPU 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load the Amazon Polarity Dataset
dataset = load_dataset("amazon_polarity")

# Subset the Dataset for Faster Training (50K train, 10K test)
dataset["train"] = dataset["train"].select(range(1000000))
dataset["test"] = dataset["test"].select(range(100000))

#Rename 'label' to 'labels' to work with Transformers
dataset = dataset.rename_column("label", "labels")

#Load the BERT tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

#Tokenization Function (max_length=128 for speed)
def tokenize_function(batch):
    return tokenizer(batch["content"], padding="max_length", truncation=True, max_length=128)

# Apply tokenizer to the dataset
dataset = dataset.map(tokenize_function, batched=True)

#  Set format for PyTorch (keep only required columns)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# Create DataLoaders
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
train_loader = DataLoader(dataset["train"], batch_size=16, shuffle=True, collate_fn=data_collator)
val_loader = DataLoader(dataset["test"], batch_size=16, shuffle=False, collate_fn=data_collator)

# Load Pretrained BERT for Binary Classification
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = model.to(device)

# Define Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Training Loop
num_epochs = 2
model.train()

for epoch in range(num_epochs):
    total_loss = 0
    print(f"\nEpoch {epoch + 1} / {num_epochs}")
    
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1} finished. Average Loss: {total_loss / len(train_loader):.4f}")

    #Save model after each epoch
    model.save_pretrained(f"./bert-amazon-epoch{epoch+1}")
    tokenizer.save_pretrained(f"./bert-amazon-epoch{epoch+1}")

# Evaluation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for batch in tqdm(val_loader, desc="Evaluating"):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions = torch.argmax(outputs.logits, dim=-1)
        correct += (predictions == batch["labels"]).sum().item()
        total += batch["labels"].size(0)

accuracy = correct / total
print(f"\n Evaluation Accuracy: {accuracy:.4f}")


  from .autonotebook import tqdm as notebook_tqdm
2025-04-15 07:31:07.463760: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744695067.482749    7281 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744695067.488513    7281 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744695067.504049    7281 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744695067.504066    7281 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744695067.504068    7281

Using device: cuda


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1 / 2


100%|██████████| 62500/62500 [2:19:19<00:00,  7.48it/s]  


Epoch 1 finished. Average Loss: 0.1504

Epoch 2 / 2


100%|██████████| 62500/62500 [2:19:34<00:00,  7.46it/s]  


Epoch 2 finished. Average Loss: 0.1097


Evaluating: 100%|██████████| 6250/6250 [04:09<00:00, 25.05it/s]


 Evaluation Accuracy: 0.9516



