### Demo of Focal Cross-Entropy Loss Functions in PyTorch
This module provides implementations of focal cross-entropy loss functions for both binary and multi-class classification tasks in PyTorch. The focal loss is designed to address class imbalance by down-weighting easy examples and focusing more on hard, misclassified examples.
The `FocalCrossEntropyLoss` class extends the functionality of a standard cross-entropy loss by incorporating a focusing parameter (`gamma`) and an optional class balancing factor (`alpha`). It also includes support for label smoothing, which can help improve model generalization in multi-class classification scenarios.
The `BinaryFocalCrossEntropyLoss` class is a specific implementation of the focal loss for binary classification tasks, while the `FocalCrossEntropyLoss` class is more general and can be used for multi-class classification tasks. Both classes allow for flexible configuration of the loss function to suit different training needs and scenarios.


In [1]:
# Install software dependencies for the focal cross-entropy loss functions demo notebook
%pip install ipykernel requests matplotlib --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h

In [None]:
# Imports
import os
import sys
import requests
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# import the focal loss functions from the local file
from focal_ce_loss import FocalCELoss
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")




SyntaxError: invalid syntax (3013583679.py, line 11)

In [None]:
# Grab the COCO dataset for the demo notebook
# Download annotions and images for the COCO 2017 dataset
dl_urls = ["http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
           "http://images.cocodataset.org/zips/train2017.zip",
           "http://images.cocodataset.org/zips/val2017.zip",
           "http://images.cocodataset.org/zips/test2017.zip"]

for url in dl_urls:
    # create data/coco directory if it doesn't exist
    os.makedirs("data/coco", exist_ok=True)
    # If the zips extracted directories already exist, skip downloading
    if os.path.exists(f"data/coco/{url.split('/')[-1].split('.')[0].split('_')[0]}"):
        print(f"{url.split('/')[-1].split('.')[0].split('_')[0]} already exists, skipping download.")
        continue
    filename = url.split("/")[-1]
    response = requests.get(url)
    with open(f"data/coco/{filename}", "wb") as f:
        f.write(response.content)

In [None]:
# Create a simple cnn pytorch model to demo and compare the focal loss functions against other inbuilt loss functions
class SimpleCNN(nn.Module):
    def __init__(self, input_channels=3, num_classes=10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1024, 128)  # Assuming input images are 32x32
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)  # Flatten the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
# Define transformations for the COCO dataset
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize images to 32x32
    transforms.ToTensor(),        # Convert images to PyTorch tensors
])

# Load the COCO dataset using torchvision
train_dataset = datasets.CocoDetection(root="data/coco/train2017",
                                       annFile="data/coco/annotations/instances_train2017.json",
                                       transform=transform)
val_dataset = datasets.CocoDetection(root="data/coco/val2017",
                                     annFile="data/coco/annotations/instances_val2017.json",
                                     transform=transform)

# Create data loaders for training and validation
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")

In [None]:
# Display 10 random sample images and their annotations from the training dataset
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
for i in range(10):
    img, annotations = train_dataset[i]
    ax = axes[i // 5, i % 5]
    ax.imshow(img.permute(1, 2, 0))  # Convert from (C, H, W) to (H, W, C)
    ax.set_title(f"Annotations: {len(annotations)}")
    ax.axis("off")
plt.tight_layout()
plt.show()


In [None]:
# Define a function to train the model and save the output metrics for comparison
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5):
    train_losses = []
    val_losses = []
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, annotations in train_loader:
            # Assuming annotations contain the class labels for simplicity
            labels = torch.tensor([ann[0]['category_id'] for ann in annotations])  # Extract class labels
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, annotations in val_loader:
                labels = torch.tensor([ann[0]['category_id'] for ann in annotations])  # Extract class labels
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        val_losses.append(val_loss / len(val_loader))
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")
    return train_losses, val_losses

In [None]:
# create a list of criteria to compare the focal loss functions against
critertia = {"mse_loss": nn.MSELoss(),
             "L1_loss": nn.L1Loss(),
             "cross_entropy_loss": nn.CrossEntropyLoss(),
             "nll_loss": nn.NLLLoss(),
             "focal_ce_loss": FocalCELoss(),
             }
# Train the model with each criterion and store the losses for comparison
results = {}
for name, criterion in critertia.items():
    print(f"Training with {name}...")
    model = SimpleCNN().to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    train_losses, val_losses = train_model(model, train_loader, val_loader, criterion, optimizer)
    results[name] = {"train_losses": train_losses, "val_losses": val_losses}

In [None]:
# Plot the training and validation losses for each criterion
plt.figure(figsize=(12, 6))
for name, result in results.items():
    plt.plot(result["train_losses"], label=f"{name} Train Loss")
    plt.plot(result["val_losses"], label=f"{name} Val Loss")
plt.title("Training and Validation Loss Comparison")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.show()