In [2]:
import torch
torch.cuda.is_available()


True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [10]:
import torch  # PyTorch for tensor computations and GPU support
from transformers import BertTokenizer, BertForSequenceClassification  # Hugging Face tools for BERT
from torch.utils.data import DataLoader, TensorDataset, random_split  # Data handling utilities
from torch.optim import AdamW  # Optimizer for training the model
import pandas as pd  # For loading and handling tabular text data

#GPU/CPU Check 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available, else CPU
print("Using device:", device)

#Combine Datasets and load them
file_path_1 = 'amazon_cells_labelled (1).txt'  # First text dataset (small)
file_path_2 = 'amazon_cells_labelled_LARGE_25K.txt'  # Second text dataset (larger)

# Function to load tab-separated text data into a DataFrame
def load_data(file_path):
    return pd.read_csv(file_path, delimiter='\t', header=None, names=["text", "label"])

# Load both datasets
df1 = load_data(file_path_1)
df2 = load_data(file_path_2)

# Combine 2 datasets into one Frame
df = pd.concat([df1, df2]).reset_index(drop=True)

# Tokenize Text
model_name = 'bert-base-uncased'  # Base BERT model (lowercase English)
tokenizer = BertTokenizer.from_pretrained(model_name)  # Load tokenizer for BERT

# Tokenize  text column into input IDs and attention masks
inputs = tokenizer(df['text'].tolist(), padding=True, truncation=True, return_tensors="pt")

# Convert labels into PyTorch tensor
labels = torch.tensor(df['label'].tolist())

# Create Tensor Dataset and Split into Train/Test
# Combine input_ids, attention_mask, and labels into one dataset
dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'], labels)

#split the dataset 80% for training and 20% for validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Random split the full dataset
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for training and validation datasets
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)  # Shuffle during training
val_loader = DataLoader(val_dataset, batch_size=16)  # No shuffle needed for validation

# Load Model and Send to GPU
# Load a BERT model for binary classification (positive/negative)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Move the model to the appropriate device (GPU)
model = model.to(device)

#  Optimizer: Use AdamW optimizer 
optimizer = AdamW(model.parameters(), lr=1e-5)

from tqdm import tqdm  # tqdm gives a nice progress bar for training steps

# Training Loop
num_epochs = 3  
model.train()  # Set model to training mode

# Loop over each epoch
for epoch in range(num_epochs):
    total_loss = 0  # Track loss for the epoch
    print(f"\nEpoch {epoch + 1} / {num_epochs}")
    
    # Loop over training batches
    for step, batch in enumerate(tqdm(train_loader)):  # Show progress with tqdm
        # Unpack the batch and move inputs to the selected device
        input_ids, attention_mask, labels = [b.to(device) for b in batch]

        optimizer.zero_grad()  # Reset gradients to zero before backward pass

        # Forward pass: compute model predictions and loss
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss  # Extract the loss value from the model output
        total_loss += loss.item()  # Accumulate loss

        loss.backward()  # Backpropagation: compute gradients
        optimizer.step()  # Update model weights

        # Every 100 steps, print the current loss
        if (step + 1) % 100 == 0:
            print(f"  Step {step+1}/{len(train_loader)} - Loss: {loss.item():.4f}")

    # Print average loss at the end of the epoch
    print(f"\nEpoch {epoch+1} finished. Average Loss: {total_loss / len(train_loader):.4f}")


Using device: cuda


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1 / 3


  8%|▊         | 102/1300 [00:11<02:07,  9.38it/s]

  Step 100/1300 - Loss: 0.3971


 16%|█▌        | 202/1300 [00:21<01:57,  9.36it/s]

  Step 200/1300 - Loss: 0.2545


 23%|██▎       | 302/1300 [00:33<01:47,  9.29it/s]

  Step 300/1300 - Loss: 0.2471


 31%|███       | 402/1300 [00:44<01:36,  9.28it/s]

  Step 400/1300 - Loss: 0.3701


 39%|███▊      | 502/1300 [00:55<01:26,  9.23it/s]

  Step 500/1300 - Loss: 0.2466


 46%|████▋     | 602/1300 [01:06<01:15,  9.22it/s]

  Step 600/1300 - Loss: 0.1859


 54%|█████▍    | 702/1300 [01:17<01:04,  9.21it/s]

  Step 700/1300 - Loss: 0.1406


 62%|██████▏   | 802/1300 [01:28<00:54,  9.18it/s]

  Step 800/1300 - Loss: 0.3449


 69%|██████▉   | 902/1300 [01:39<00:43,  9.14it/s]

  Step 900/1300 - Loss: 0.4618


 77%|███████▋  | 1002/1300 [01:51<00:32,  9.13it/s]

  Step 1000/1300 - Loss: 0.6895


 85%|████████▍ | 1102/1300 [02:02<00:22,  8.77it/s]

  Step 1100/1300 - Loss: 0.2058


 92%|█████████▏| 1202/1300 [02:13<00:10,  9.15it/s]

  Step 1200/1300 - Loss: 0.0303


100%|██████████| 1300/1300 [02:24<00:00,  9.00it/s]


  Step 1300/1300 - Loss: 0.1555

Epoch 1 finished. Average Loss: 0.2325

Epoch 2 / 3


  8%|▊         | 102/1300 [00:11<02:10,  9.17it/s]

  Step 100/1300 - Loss: 0.1551


 16%|█▌        | 202/1300 [00:22<02:00,  9.14it/s]

  Step 200/1300 - Loss: 0.0059


 23%|██▎       | 302/1300 [00:33<01:48,  9.18it/s]

  Step 300/1300 - Loss: 0.0099


 31%|███       | 402/1300 [00:45<01:38,  9.12it/s]

  Step 400/1300 - Loss: 0.2675


 39%|███▊      | 502/1300 [00:56<01:27,  9.16it/s]

  Step 500/1300 - Loss: 0.1518


 46%|████▋     | 602/1300 [01:07<01:16,  9.17it/s]

  Step 600/1300 - Loss: 0.1844


 54%|█████▍    | 702/1300 [01:18<01:06,  9.02it/s]

  Step 700/1300 - Loss: 0.1454


 62%|██████▏   | 802/1300 [01:29<00:54,  9.15it/s]

  Step 800/1300 - Loss: 0.1246


 69%|██████▉   | 902/1300 [01:41<00:43,  9.16it/s]

  Step 900/1300 - Loss: 0.0979


 77%|███████▋  | 1002/1300 [01:52<00:32,  9.15it/s]

  Step 1000/1300 - Loss: 0.2275


 85%|████████▍ | 1102/1300 [02:03<00:21,  9.14it/s]

  Step 1100/1300 - Loss: 0.0381


 92%|█████████▏| 1202/1300 [02:14<00:10,  9.12it/s]

  Step 1200/1300 - Loss: 0.3773


100%|██████████| 1300/1300 [02:25<00:00,  8.91it/s]


  Step 1300/1300 - Loss: 0.1269

Epoch 2 finished. Average Loss: 0.1371

Epoch 3 / 3


  8%|▊         | 102/1300 [00:11<02:11,  9.14it/s]

  Step 100/1300 - Loss: 0.0093


 16%|█▌        | 202/1300 [00:22<01:59,  9.17it/s]

  Step 200/1300 - Loss: 0.0207


 23%|██▎       | 302/1300 [00:33<01:49,  9.14it/s]

  Step 300/1300 - Loss: 0.0203


 31%|███       | 402/1300 [00:45<01:39,  9.00it/s]

  Step 400/1300 - Loss: 0.0333


 39%|███▊      | 502/1300 [00:56<01:27,  9.16it/s]

  Step 500/1300 - Loss: 0.3097


 46%|████▋     | 602/1300 [01:07<01:16,  9.16it/s]

  Step 600/1300 - Loss: 0.0722


 54%|█████▍    | 702/1300 [01:18<01:05,  9.12it/s]

  Step 700/1300 - Loss: 0.1350


 62%|██████▏   | 802/1300 [01:29<00:54,  9.14it/s]

  Step 800/1300 - Loss: 0.0864


 69%|██████▉   | 902/1300 [01:41<00:43,  9.13it/s]

  Step 900/1300 - Loss: 0.0885


 77%|███████▋  | 1002/1300 [01:52<00:32,  9.16it/s]

  Step 1000/1300 - Loss: 0.2055


 85%|████████▍ | 1102/1300 [02:03<00:21,  9.13it/s]

  Step 1100/1300 - Loss: 0.0964


 92%|█████████▏| 1202/1300 [02:14<00:10,  9.16it/s]

  Step 1200/1300 - Loss: 0.0278


100%|██████████| 1300/1300 [02:25<00:00,  8.91it/s]

  Step 1300/1300 - Loss: 0.0874

Epoch 3 finished. Average Loss: 0.0852





In [11]:
#Validation Loop
model.eval()  # Set the model to evaluation mode

# Track metrics like accuracy
correct = 0
total = 0

with torch.no_grad():  # No gradients needed for validation
    for batch in val_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

        correct += (predictions == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total
print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9365
