**Name**: Lamine Deen  
**Purpose**: Demo for custom loss entropy application to Dense and Convolutional Layers

In [18]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import time
import pandas as pd
from custom_entropy import LayerFinder, CustomEntropyLoss # the classes that do the magic


# Data preprocessing
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Load CIFAR-10 dataset
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

# Load a pre-trained model (ResNet18)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 10)  # Modify the output layer for CIFAR-10

# print layers and their types
finder = LayerFinder(model)
finder.print_layers()

# layers indicies and lambdas to apply for each corresponding pairs
layers_indices = [0, 9]
lamdas = [0.1, 0.001]

# Check if a GPU is available
if torch.backends.mps.is_available():
    device = torch.device("mps")  # Use MPS for Apple Silicon
elif torch.cuda.is_available():
    device = torch.device("cuda")  # Use CUDA for NVIDIA GPUs
else:
    device = torch.device("cpu")  # Use CPU if no GPU is available


print(device)
model = model.to(device) # move model to device


Files already downloaded and verified
Files already downloaded and verified
Layers in the model:
Layer 0: conv1 -> Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
Layer 1: bn1 -> BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Layer 2: relu -> ReLU(inplace=True)
Layer 3: maxpool -> MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
Layer 4: layer1 -> Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d

In [19]:
# Function to train the model
def train_model(model, criterion, optimizer, num_epochs=10, custom_loss=False):
    since = time.time()
    
    best_acc = 0.0
    num_epochs_used = 0

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        
        running_loss = 0.0
        correct_preds = 0
        total_preds = 0
        
        for inputs, labels in trainloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = model(inputs)
            if custom_loss:
                loss = criterion(model, outputs, labels)  # Custom entropy loss
            else:
                loss = criterion(outputs, labels)  # Standard CrossEntropyLoss

            _, preds = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            correct_preds += torch.sum(preds == labels.data)
            total_preds += labels.size(0)

        # Calculate accuracy and loss for this epoch
        epoch_acc = correct_preds.float() / total_preds
        epoch_loss = running_loss / len(trainloader.dataset)
        
        # Save best accuracy and epoch if improvement
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            num_epochs_used = epoch + 1
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    time_elapsed = time.time() - since
    return time_elapsed, num_epochs_used, best_acc.item()

# Define comparison test between two configurations
def compare_losses(layers_indices, lambdas):
    # Initialize results list
    results = []

    # Config 1: Standard CrossEntropyLoss
    model_ce = models.resnet18(pretrained=True)
    model_ce.fc = nn.Linear(model_ce.fc.in_features, 10)
    model_ce = model_ce.to(device)
    criterion_ce = nn.CrossEntropyLoss()
    optimizer_ce = optim.SGD(model_ce.parameters(), lr=0.001, momentum=0.9)
    
    print("Training with CrossEntropyLoss...")
    time_ce, epochs_ce, acc_ce = train_model(model_ce, criterion_ce, optimizer_ce, num_epochs=10)
    results.append({"Configuration": "CrossEntropyLoss", "Time (s)": time_ce, "Epochs": epochs_ce, "Accuracy": acc_ce})
    
    # Config 2: CustomEntropyLoss
    model_custom = models.resnet18(pretrained=True)
    model_custom.fc = nn.Linear(model_custom.fc.in_features, 10)
    model_custom = model_custom.to(device)
    # layer_indices = [6]  # Apply entropy loss to the last layer before fully connected (for simplicity)
    # lambdas = [0.01]
    criterion_custom = CustomEntropyLoss(layers_indices, lambdas)
    optimizer_custom = optim.SGD(model_custom.parameters(), lr=0.001, momentum=0.9)
    
    print("\nTraining with CrossEntropyLoss + CustomEntropyLoss...")
    time_custom, epochs_custom, acc_custom = train_model(model_custom, criterion_custom, optimizer_custom, num_epochs=10, custom_loss=True)
    results.append({"Configuration": "CrossEntropyLoss + CustomEntropyLoss", "Time (s)": time_custom, "Epochs": epochs_custom, "Accuracy": acc_custom})
    
    # Display the results as a table
    results_df = pd.DataFrame(results)
    print(results_df)


Run test using both configurations

In [20]:
# Run the comparison
compare_losses(layers_indices, lamdas)

Training with CrossEntropyLoss...
Epoch 1/10, Loss: 1.1565, Accuracy: 0.5953
Epoch 2/10, Loss: 0.6909, Accuracy: 0.7613
Epoch 3/10, Loss: 0.5225, Accuracy: 0.8187
Epoch 4/10, Loss: 0.3958, Accuracy: 0.8629
Epoch 5/10, Loss: 0.2986, Accuracy: 0.8968
Epoch 6/10, Loss: 0.2174, Accuracy: 0.9260
Epoch 7/10, Loss: 0.1564, Accuracy: 0.9481
Epoch 8/10, Loss: 0.1086, Accuracy: 0.9653
Epoch 9/10, Loss: 0.0791, Accuracy: 0.9752
Epoch 10/10, Loss: 0.0611, Accuracy: 0.9806

Training with CrossEntropyLoss + CustomEntropyLoss...
Epoch 1/10, Loss: 1.2204, Accuracy: 0.5934
Epoch 2/10, Loss: 0.6890, Accuracy: 0.7561
Epoch 3/10, Loss: 0.4945, Accuracy: 0.8181
Epoch 4/10, Loss: 0.3571, Accuracy: 0.8583
Epoch 5/10, Loss: 0.2416, Accuracy: 0.8945
Epoch 6/10, Loss: 0.1510, Accuracy: 0.9239
Epoch 7/10, Loss: 0.0758, Accuracy: 0.9478
Epoch 8/10, Loss: 0.0278, Accuracy: 0.9623
Epoch 9/10, Loss: -0.0119, Accuracy: 0.9739
Epoch 10/10, Loss: -0.0354, Accuracy: 0.9801
                          Configuration    Time