In [1]:
# Upgrade pip
!pip install --upgrade pip
# Install PyTorch with MPS support
!pip install torch torchvision
# Install thops for metric analysis
!pip install thop

[0m

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import utils.dependencies as utils
import utils.metrics as metrics
from torchvision import models

In [3]:
# Define device
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple Silicon GPU (MPS)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA GPU")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using CUDA GPU


In [4]:
# Get the data loaders
train_loader, val_loader, test_loader = utils.get_data_loaders()

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# Load pre-trained EfficientNet-B0
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)

# Display the modified model architecture
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [6]:
# Since we want to retain the rich feature representations learned by the pre-trained model on ImageNet
# We just replace the final layer to match the 10 classes in the CIFAR-10 dataset 
# Since the classifier has two layers and the second layer is the last indexed at 1 we check the number of 
# input features then replace that layer with a similar linear layer with the same number of input features
# and 10 outputs
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 10)

# Move the model to the specified device
model = model.to(device)

# Confirm that the final model architecture has 10 outputs
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [7]:
# Freeze all layers except the classifier
for name, param in model.named_parameters():
    if "classifier" not in name:
        param.requires_grad = False
    
# Define the layers to unfreeze (last two blocks)
layers_to_unfreeze = ['features.5', 'features.6', 'features.7']

# Unfreeze the specified layers
utils.unfreeze_layers(model, layers_to_unfreeze)

# Verify which parameters are trainable
for name, param in model.named_parameters():
    status = 'Trainable' if param.requires_grad else 'Frozen'
    print(f"{name}: {status}")


features.0.0.weight: Frozen
features.0.1.weight: Frozen
features.0.1.bias: Frozen
features.1.0.block.0.0.weight: Frozen
features.1.0.block.0.1.weight: Frozen
features.1.0.block.0.1.bias: Frozen
features.1.0.block.1.fc1.weight: Frozen
features.1.0.block.1.fc1.bias: Frozen
features.1.0.block.1.fc2.weight: Frozen
features.1.0.block.1.fc2.bias: Frozen
features.1.0.block.2.0.weight: Frozen
features.1.0.block.2.1.weight: Frozen
features.1.0.block.2.1.bias: Frozen
features.2.0.block.0.0.weight: Frozen
features.2.0.block.0.1.weight: Frozen
features.2.0.block.0.1.bias: Frozen
features.2.0.block.1.0.weight: Frozen
features.2.0.block.1.1.weight: Frozen
features.2.0.block.1.1.bias: Frozen
features.2.0.block.2.fc1.weight: Frozen
features.2.0.block.2.fc1.bias: Frozen
features.2.0.block.2.fc2.weight: Frozen
features.2.0.block.2.fc2.bias: Frozen
features.2.0.block.3.0.weight: Frozen
features.2.0.block.3.1.weight: Frozen
features.2.0.block.3.1.bias: Frozen
features.2.1.block.0.0.weight: Frozen
features

In [8]:
# Define optimizer to include only trainable parameters
optimizer = optim.SGD(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=0.01,  
    momentum=0.9,
    weight_decay=5e-4
)

# Define a learning rate scheduler for fine-tuning
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


In [9]:
# Training loop
num_epochs = 10
best_val_acc = 0.0
save_path = './models/finetuned_base_model.pth'

# Initialize lists to store metrics
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

for epoch in range(1, num_epochs + 1):
    print(f"--- Epoch {epoch} ---")    
    # Train
    train_loss, train_acc = utils.train_epoch(model, device, train_loader, optimizer)  
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    # Validate
    val_loss, val_acc = utils.validate_epoch(model, device, val_loader)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)
    # Step the scheduler
    scheduler.step()    
    # Save the best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), save_path)
        print(f"Best model saved with Val Acc: {best_val_acc:.2f}%\n")
    else:
        print("No improvement this epoch.\n")

--- Epoch 1 ---
Train Loss: 0.4990 | Train Acc: 84.21% | Time: 59.17s
Val Loss: 0.1865 | Val Acc: 93.52%
Best model saved with Val Acc: 93.52%

--- Epoch 2 ---
Train Loss: 0.1736 | Train Acc: 94.10% | Time: 57.07s
Val Loss: 0.1480 | Val Acc: 94.98%
Best model saved with Val Acc: 94.98%

--- Epoch 3 ---
Train Loss: 0.1189 | Train Acc: 96.01% | Time: 57.18s
Val Loss: 0.1340 | Val Acc: 95.56%
Best model saved with Val Acc: 95.56%

--- Epoch 4 ---
Train Loss: 0.0890 | Train Acc: 96.94% | Time: 57.12s
Val Loss: 0.1272 | Val Acc: 95.88%
Best model saved with Val Acc: 95.88%

--- Epoch 5 ---
Train Loss: 0.0658 | Train Acc: 97.80% | Time: 57.01s
Val Loss: 0.1265 | Val Acc: 95.86%
No improvement this epoch.

--- Epoch 6 ---
Train Loss: 0.0495 | Train Acc: 98.38% | Time: 56.78s
Val Loss: 0.1198 | Val Acc: 96.30%
Best model saved with Val Acc: 96.30%

--- Epoch 7 ---
Train Loss: 0.0446 | Train Acc: 98.55% | Time: 56.87s
Val Loss: 0.1198 | Val Acc: 96.26%
No improvement this epoch.

--- Epoch 8 --

In [12]:
# Generate and save metrics plots and table
metrics.generate_and_save_metrics(
    model=model, 
    device=device, 
    test_loader=test_loader, 
    criterion=utils.criterion, 
    model_name='efficientnet_b0_base', 
    pruning_ratio=0.0, 
    description='Base model without pruning',
    train_losses=train_losses,
    train_accuracies=train_accuracies,
    val_losses=val_losses,
    val_accuracies=val_accuracies,
    save_dir='metrics_plots'
)

--- Generating Metrics for efficientnet_b0_base ---
Calculating Model Size...
Measuring Inference Time...
Computing FLOPs...
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
Measuring Memory Usage...
Evaluating Model on Test Set...
Test Loss: 0.1358 | Test Accuracy: 95.97%
Generating Metrics Table...
Metrics table saved in 'metrics_plots' as 'efficientnet_b0_base_metrics_table.png'.
Generating Training and Validation Metrics Plots...
Training and validation metrics plots saved in 'metrics_plots' as 'efficientnet_b0_base_training_validation_plots.png