## Accelerate Inference: Neural Network Pruning (IMPROVED)

In [1]:
import os
import numpy as np
import cv2
import pickle
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torchsummary import summary

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
# untar
!ls
!tar -xvzf dataset.tar.gz
# load train
train_images = pickle.load(open('train_images.pkl', 'rb'))
train_labels = pickle.load(open('train_labels.pkl', 'rb'))
# load val
val_images = pickle.load(open('val_images.pkl', 'rb'))
val_labels = pickle.load(open('val_labels.pkl', 'rb'))

dataset.tar.gz	sample_data
train_images.pkl
train_labels.pkl
val_images.pkl
val_labels.pkl


In [4]:
train_images = torch.tensor(train_images, dtype=torch.float32)
val_images = torch.tensor(val_images, dtype=torch.float32)

train_images = train_images.permute(0, 3, 1, 2)
val_images = val_images.permute(0, 3, 1, 2)

print(f"Train images shape: {train_images.shape}")
print(f"Val images shape: {val_images.shape}")

Train images shape: torch.Size([22475, 3, 25, 25])
Val images shape: torch.Size([2525, 3, 25, 25])


In [5]:
class AugmentedDataset(Dataset):
    def __init__(self, images, labels, is_train=True):
        self.images = images
        self.labels = labels
        self.is_train = is_train

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]

        if self.is_train:
            # Random horizontal flip
            if torch.rand(1) > 0.5:
                img = torch.flip(img, [2])  # Flip along width dimension

            # Random brightness and contrast adjustment
            if torch.rand(1) > 0.3:
                brightness_factor = 0.8 + torch.rand(1).item() * 0.4  # [0.8, 1.2]
                img = img * brightness_factor
                img = torch.clamp(img, 0, 255)

            # Random noise
            if torch.rand(1) > 0.5:
                noise = torch.randn_like(img) * 2.0
                img = img + noise
                img = torch.clamp(img, 0, 255)

        return img, label

# Create augmented datasets
train_dataset = AugmentedDataset(
    train_images,
    torch.tensor(train_labels.squeeze(), dtype=torch.long),
    is_train=True
)
val_dataset = AugmentedDataset(
    val_images,
    torch.tensor(val_labels.squeeze(), dtype=torch.long),
    is_train=False
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Val dataset size: {len(val_dataset)}")

Train dataset size: 22475
Val dataset size: 2525


In [6]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [7]:
def create_model_with_bn(cfg=None):
    if cfg is None:
        cfg = [32, 32, 64, 64]

    model = nn.Sequential(
        nn.Conv2d(3, cfg[0], kernel_size=3, padding=1, bias=False),
        nn.BatchNorm2d(cfg[0]),
        nn.ReLU(),
        nn.Conv2d(cfg[0], cfg[1], kernel_size=3, padding=0, bias=False),
        nn.BatchNorm2d(cfg[1]),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Dropout(0.25),

        nn.Conv2d(cfg[1], cfg[2], kernel_size=3, padding=1, bias=False),
        nn.BatchNorm2d(cfg[2]),
        nn.ReLU(),
        nn.Conv2d(cfg[2], cfg[3], kernel_size=3, padding=0, bias=False),
        nn.BatchNorm2d(cfg[3]),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Dropout(0.25),

        nn.Flatten(),
        nn.Linear(cfg[3] * 4 * 4, 512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, 5),
    )
    return model

In [8]:
def train_one_epoch_with_bn_reg(model, train_loader, optimizer, criterion, device, l1_lambda=0.0):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_loader_tqdm = tqdm(train_loader, desc="Training", leave=False)

    for inputs, labels in train_loader_tqdm:
        optimizer.zero_grad()
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        if l1_lambda > 0:
            bn_regularization = 0.0
            for module in model.modules():
                if isinstance(module, nn.BatchNorm2d):
                    bn_regularization += torch.sum(torch.abs(module.weight))
            loss = loss + l1_lambda * bn_regularization

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        train_loader_tqdm.set_postfix(loss=running_loss / total, accuracy=100 * correct / total)

    return running_loss / len(train_loader), 100 * correct / total

In [9]:
def validate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_loader, desc="Validation", leave=False)

    with torch.no_grad():
        for inputs, labels in val_loader_tqdm:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            val_loader_tqdm.set_postfix(loss=val_loss / total, accuracy=100 * correct / total)

    return val_loss / len(val_loader), 100 * correct / total

In [10]:
def gather_bn_weights(model):
    bn_weights = []
    for module in model.modules():
        if isinstance(module, nn.BatchNorm2d):
            bn_weights.append(module.weight.data.abs().clone())
    return torch.cat(bn_weights)


def compute_pruning_threshold(model, percent):
    """
    Enhanced adaptive threshold selection
    """
    all_bn_weights = gather_bn_weights(model)
    sorted_bn = torch.sort(all_bn_weights)[0]

    threshold_index = int(len(sorted_bn) * percent / 100)
    percentile_threshold = sorted_bn[threshold_index]

    mean_weight = all_bn_weights.mean()
    std_weight = all_bn_weights.std()

    if abs(percentile_threshold - mean_weight) < 0.5 * std_weight:
        adjusted_threshold = mean_weight - (percent / 100.0) * std_weight
        threshold = max(adjusted_threshold, sorted_bn[min(threshold_index, len(sorted_bn)-1)])
    else:
        threshold = percentile_threshold

    return threshold


def prune_model(model, percent):
    threshold = compute_pruning_threshold(model, percent)
    print(f"Pruning threshold: {threshold:.6f}")

    cfg_mask = []
    cfg = []
    for module in model.modules():
        if isinstance(module, nn.BatchNorm2d):
            weight_copy = module.weight.data.abs().clone()
            mask = weight_copy.gt(threshold).float()
            cfg_mask.append(mask)
            cfg.append(int(torch.sum(mask)))

    conv_layers = []
    bn_layers = []
    linear_layers = []

    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            conv_layers.append(module)
        elif isinstance(module, nn.BatchNorm2d):
            bn_layers.append(module)
        elif isinstance(module, nn.Linear):
            linear_layers.append(module)

    for i, (conv, bn, mask) in enumerate(zip(conv_layers, bn_layers, cfg_mask)):
        bn.weight.data.mul_(mask)
        bn.bias.data.mul_(mask)

        for j in range(len(mask)):
            if mask[j] == 0:
                conv.weight.data[j, :, :, :] = 0

        if i + 1 < len(conv_layers):
            next_conv = conv_layers[i + 1]
            for j in range(len(mask)):
                if mask[j] == 0:
                    next_conv.weight.data[:, j, :, :] = 0

    if len(linear_layers) > 0 and len(cfg_mask) > 0:
        first_linear = linear_layers[0]
        last_mask = cfg_mask[-1]
        spatial_size = 4 * 4

        for j in range(len(last_mask)):
            if last_mask[j] == 0:
                start_idx = j * spatial_size
                end_idx = (j + 1) * spatial_size
                first_linear.weight.data[:, start_idx:end_idx] = 0

    return cfg


# IMPROVEMENT 3: Add Linear Layer Pruning
def prune_linear_layers(model, percent=50):
    """
    Prune linear layer weights by magnitude
    """
    print(f"\nPruning linear layers by {percent}%...")
    for module in model.modules():
        if isinstance(module, nn.Linear):
            weight = module.weight.data
            threshold = torch.quantile(torch.abs(weight), percent / 100.0)
            mask = torch.abs(weight) > threshold
            module.weight.data *= mask.float()

            # Also prune bias for pruned output neurons (optional)
            if module.bias is not None:
                row_sums = torch.abs(weight).sum(dim=1)
                bias_mask = row_sums > 0
                module.bias.data *= bias_mask.float()


def calculate_sparsity(model):
    """Calculate sparsity of the model."""
    num_zero = 0
    total_params = 0

    for param in model.parameters():
        total_params += param.numel()
        num_zero += torch.sum(param.data == 0).item()

    sparsity = num_zero / total_params if total_params > 0 else 0
    return sparsity, num_zero, total_params

In [11]:
# STEP 1: Train model with BN and L1 regularization
print("="*60)
print("STEP 1: Training with L1 regularization on BN")
print("="*60)

model_bn = create_model_with_bn()
model_bn = model_bn.to(device)

criterion = nn.CrossEntropyLoss()

# IMPROVEMENT 4: Extended training with warmup
num_epochs = 150  # Increased from 100
l1_lambda = 3e-4  # Slightly reduced for better initial accuracy

optimizer = torch.optim.Adam(model_bn.parameters(), lr=0.001, weight_decay=1e-6)

# Add warmup scheduler
from torch.optim.lr_scheduler import LinearLR, SequentialLR
warmup_epochs = 10
warmup_scheduler = LinearLR(optimizer, start_factor=0.1, total_iters=warmup_epochs)
main_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs-warmup_epochs)
scheduler = SequentialLR(optimizer, schedulers=[warmup_scheduler, main_scheduler], milestones=[warmup_epochs])

# IMPROVEMENT 5: Track best model
best_val_accuracy_step1 = 0
best_model_state_step1 = None

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss, train_accuracy = train_one_epoch_with_bn_reg(
        model_bn, train_loader, optimizer, criterion, device, l1_lambda
    )

    val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)

    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, '
          f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%, '
          f'LR: {scheduler.get_last_lr()[0]:.6f}')

    # Track best model
    if val_accuracy > best_val_accuracy_step1:
        best_val_accuracy_step1 = val_accuracy
        best_model_state_step1 = {k: v.cpu().clone() for k, v in model_bn.state_dict().items()}
        print(f"  *** New best: {best_val_accuracy_step1:.2f}% ***")

    scheduler.step()

# Load best model from step 1
if best_model_state_step1:
    print(f"\nLoading best model from training: {best_val_accuracy_step1:.2f}%")
    model_bn.load_state_dict({k: v.to(device) for k, v in best_model_state_step1.items()})

print("\nInitial model performance:")
val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
print(f"Validation Accuracy: {val_accuracy:.2f}%")

STEP 1: Training with L1 regularization on BN
Epoch 1/150




Train Loss: 1.6685, Train Acc: 23.22%, Val Loss: 1.5119, Val Acc: 32.48%, LR: 0.000100
  *** New best: 32.48% ***
Epoch 2/150




Train Loss: 1.6082, Train Acc: 27.65%, Val Loss: 1.4153, Val Acc: 37.07%, LR: 0.000190
  *** New best: 37.07% ***
Epoch 3/150




Train Loss: 1.5751, Train Acc: 30.02%, Val Loss: 1.3783, Val Acc: 38.73%, LR: 0.000280
  *** New best: 38.73% ***
Epoch 4/150




Train Loss: 1.5561, Train Acc: 31.33%, Val Loss: 1.3176, Val Acc: 42.69%, LR: 0.000370
  *** New best: 42.69% ***
Epoch 5/150




Train Loss: 1.5340, Train Acc: 33.20%, Val Loss: 1.3181, Val Acc: 42.46%, LR: 0.000460
Epoch 6/150




Train Loss: 1.5167, Train Acc: 34.08%, Val Loss: 1.2262, Val Acc: 48.16%, LR: 0.000550
  *** New best: 48.16% ***
Epoch 7/150




Train Loss: 1.5009, Train Acc: 34.68%, Val Loss: 1.2412, Val Acc: 46.34%, LR: 0.000640
Epoch 8/150




Train Loss: 1.4892, Train Acc: 35.76%, Val Loss: 1.2112, Val Acc: 46.89%, LR: 0.000730
Epoch 9/150




Train Loss: 1.4784, Train Acc: 36.29%, Val Loss: 1.3403, Val Acc: 43.80%, LR: 0.000820
Epoch 10/150




Train Loss: 1.4608, Train Acc: 37.22%, Val Loss: 1.1569, Val Acc: 51.64%, LR: 0.000910
  *** New best: 51.64% ***
Epoch 11/150




Train Loss: 1.4495, Train Acc: 38.18%, Val Loss: 1.1554, Val Acc: 52.83%, LR: 0.001000
  *** New best: 52.83% ***
Epoch 12/150




Train Loss: 1.4374, Train Acc: 38.59%, Val Loss: 1.1179, Val Acc: 53.94%, LR: 0.001000
  *** New best: 53.94% ***
Epoch 13/150




Train Loss: 1.4302, Train Acc: 38.83%, Val Loss: 1.3129, Val Acc: 45.90%, LR: 0.000999
Epoch 14/150




Train Loss: 1.4075, Train Acc: 39.24%, Val Loss: 1.1331, Val Acc: 54.22%, LR: 0.000999
  *** New best: 54.22% ***
Epoch 15/150




Train Loss: 1.3998, Train Acc: 40.65%, Val Loss: 1.0687, Val Acc: 55.96%, LR: 0.000998
  *** New best: 55.96% ***
Epoch 16/150




Train Loss: 1.3881, Train Acc: 40.72%, Val Loss: 1.0418, Val Acc: 58.22%, LR: 0.000997
  *** New best: 58.22% ***
Epoch 17/150




Train Loss: 1.3792, Train Acc: 40.89%, Val Loss: 1.0751, Val Acc: 56.28%, LR: 0.000995
Epoch 18/150




Train Loss: 1.3665, Train Acc: 41.45%, Val Loss: 1.0310, Val Acc: 58.10%, LR: 0.000994
Epoch 19/150




Train Loss: 1.3582, Train Acc: 42.05%, Val Loss: 0.9813, Val Acc: 61.15%, LR: 0.000992
  *** New best: 61.15% ***
Epoch 20/150




Train Loss: 1.3433, Train Acc: 43.23%, Val Loss: 0.9742, Val Acc: 60.36%, LR: 0.000990
Epoch 21/150




Train Loss: 1.3363, Train Acc: 43.00%, Val Loss: 0.9605, Val Acc: 61.23%, LR: 0.000987
  *** New best: 61.23% ***
Epoch 22/150




Train Loss: 1.3276, Train Acc: 43.39%, Val Loss: 1.1138, Val Acc: 57.50%, LR: 0.000985
Epoch 23/150




Train Loss: 1.3177, Train Acc: 44.04%, Val Loss: 0.9620, Val Acc: 60.91%, LR: 0.000982
Epoch 24/150




Train Loss: 1.3183, Train Acc: 43.84%, Val Loss: 0.9289, Val Acc: 64.32%, LR: 0.000979
  *** New best: 64.32% ***
Epoch 25/150




Train Loss: 1.3019, Train Acc: 45.04%, Val Loss: 0.9028, Val Acc: 64.55%, LR: 0.000976
  *** New best: 64.55% ***
Epoch 26/150




Train Loss: 1.3006, Train Acc: 44.67%, Val Loss: 0.8821, Val Acc: 65.47%, LR: 0.000972
  *** New best: 65.47% ***
Epoch 27/150




Train Loss: 1.2935, Train Acc: 45.09%, Val Loss: 0.8838, Val Acc: 64.48%, LR: 0.000968
Epoch 28/150




Train Loss: 1.2844, Train Acc: 45.45%, Val Loss: 0.8596, Val Acc: 66.69%, LR: 0.000964
  *** New best: 66.69% ***
Epoch 29/150




Train Loss: 1.2781, Train Acc: 45.79%, Val Loss: 0.8757, Val Acc: 65.07%, LR: 0.000960
Epoch 30/150




Train Loss: 1.2769, Train Acc: 46.02%, Val Loss: 0.8498, Val Acc: 67.76%, LR: 0.000955
  *** New best: 67.76% ***
Epoch 31/150




Train Loss: 1.2658, Train Acc: 46.20%, Val Loss: 0.8434, Val Acc: 67.76%, LR: 0.000950
Epoch 32/150




Train Loss: 1.2583, Train Acc: 46.21%, Val Loss: 0.8443, Val Acc: 66.53%, LR: 0.000946
Epoch 33/150




Train Loss: 1.2554, Train Acc: 46.94%, Val Loss: 0.8212, Val Acc: 67.96%, LR: 0.000940
  *** New best: 67.96% ***
Epoch 34/150




Train Loss: 1.2643, Train Acc: 45.82%, Val Loss: 0.8401, Val Acc: 67.05%, LR: 0.000935
Epoch 35/150




Train Loss: 1.2388, Train Acc: 47.15%, Val Loss: 0.8326, Val Acc: 68.12%, LR: 0.000929
  *** New best: 68.12% ***
Epoch 36/150




Train Loss: 1.2398, Train Acc: 47.26%, Val Loss: 0.8118, Val Acc: 69.47%, LR: 0.000923
  *** New best: 69.47% ***
Epoch 37/150




Train Loss: 1.2416, Train Acc: 47.19%, Val Loss: 0.7925, Val Acc: 70.26%, LR: 0.000917
  *** New best: 70.26% ***
Epoch 38/150




Train Loss: 1.2369, Train Acc: 47.68%, Val Loss: 0.8508, Val Acc: 68.32%, LR: 0.000911
Epoch 39/150




Train Loss: 1.2250, Train Acc: 46.95%, Val Loss: 0.7939, Val Acc: 69.54%, LR: 0.000905
Epoch 40/150




Train Loss: 1.2300, Train Acc: 47.43%, Val Loss: 0.7884, Val Acc: 69.50%, LR: 0.000898
Epoch 41/150




Train Loss: 1.2242, Train Acc: 47.94%, Val Loss: 0.7563, Val Acc: 70.77%, LR: 0.000891
  *** New best: 70.77% ***
Epoch 42/150




Train Loss: 1.2184, Train Acc: 48.10%, Val Loss: 0.7839, Val Acc: 69.70%, LR: 0.000884
Epoch 43/150




Train Loss: 1.2135, Train Acc: 48.79%, Val Loss: 0.7779, Val Acc: 70.30%, LR: 0.000877
Epoch 44/150




Train Loss: 1.2141, Train Acc: 47.94%, Val Loss: 0.7621, Val Acc: 70.93%, LR: 0.000869
  *** New best: 70.93% ***
Epoch 45/150




Train Loss: 1.1948, Train Acc: 49.09%, Val Loss: 0.7598, Val Acc: 71.52%, LR: 0.000861
  *** New best: 71.52% ***
Epoch 46/150




Train Loss: 1.1990, Train Acc: 49.09%, Val Loss: 0.7542, Val Acc: 71.88%, LR: 0.000854
  *** New best: 71.88% ***
Epoch 47/150




Train Loss: 1.2008, Train Acc: 48.61%, Val Loss: 0.7720, Val Acc: 71.01%, LR: 0.000846
Epoch 48/150




Train Loss: 1.1960, Train Acc: 48.78%, Val Loss: 0.7564, Val Acc: 71.68%, LR: 0.000837
Epoch 49/150




Train Loss: 1.1978, Train Acc: 48.57%, Val Loss: 0.7349, Val Acc: 72.67%, LR: 0.000829
  *** New best: 72.67% ***
Epoch 50/150




Train Loss: 1.1884, Train Acc: 49.62%, Val Loss: 0.7284, Val Acc: 72.75%, LR: 0.000820
  *** New best: 72.75% ***
Epoch 51/150




Train Loss: 1.1823, Train Acc: 49.49%, Val Loss: 0.7365, Val Acc: 72.63%, LR: 0.000812
Epoch 52/150




Train Loss: 1.1828, Train Acc: 49.30%, Val Loss: 0.7206, Val Acc: 73.19%, LR: 0.000803
  *** New best: 73.19% ***
Epoch 53/150




Train Loss: 1.1773, Train Acc: 49.88%, Val Loss: 0.7278, Val Acc: 73.07%, LR: 0.000794
Epoch 54/150




Train Loss: 1.1822, Train Acc: 49.58%, Val Loss: 0.7765, Val Acc: 71.21%, LR: 0.000785
Epoch 55/150




Train Loss: 1.1836, Train Acc: 49.40%, Val Loss: 0.7192, Val Acc: 73.31%, LR: 0.000775
  *** New best: 73.31% ***
Epoch 56/150




Train Loss: 1.1689, Train Acc: 50.30%, Val Loss: 0.7138, Val Acc: 74.06%, LR: 0.000766
  *** New best: 74.06% ***
Epoch 57/150




Train Loss: 1.1631, Train Acc: 50.48%, Val Loss: 0.7496, Val Acc: 71.60%, LR: 0.000756
Epoch 58/150




Train Loss: 1.1686, Train Acc: 49.90%, Val Loss: 0.7093, Val Acc: 73.90%, LR: 0.000747
Epoch 59/150




Train Loss: 1.1790, Train Acc: 49.29%, Val Loss: 0.7005, Val Acc: 73.98%, LR: 0.000737
Epoch 60/150




Train Loss: 1.1631, Train Acc: 50.36%, Val Loss: 0.7153, Val Acc: 74.34%, LR: 0.000727
  *** New best: 74.34% ***
Epoch 61/150




Train Loss: 1.1615, Train Acc: 50.58%, Val Loss: 0.7001, Val Acc: 74.38%, LR: 0.000717
  *** New best: 74.38% ***
Epoch 62/150




Train Loss: 1.1552, Train Acc: 51.04%, Val Loss: 0.6892, Val Acc: 74.42%, LR: 0.000707
  *** New best: 74.42% ***
Epoch 63/150




Train Loss: 1.1537, Train Acc: 50.89%, Val Loss: 0.6933, Val Acc: 74.38%, LR: 0.000697
Epoch 64/150




Train Loss: 1.1546, Train Acc: 50.99%, Val Loss: 0.6913, Val Acc: 74.38%, LR: 0.000686
Epoch 65/150




Train Loss: 1.1451, Train Acc: 51.00%, Val Loss: 0.6870, Val Acc: 74.53%, LR: 0.000676
  *** New best: 74.53% ***
Epoch 66/150




Train Loss: 1.1430, Train Acc: 51.31%, Val Loss: 0.6928, Val Acc: 74.26%, LR: 0.000665
Epoch 67/150




Train Loss: 1.1515, Train Acc: 50.75%, Val Loss: 0.6947, Val Acc: 74.38%, LR: 0.000655
Epoch 68/150




Train Loss: 1.1406, Train Acc: 51.76%, Val Loss: 0.6822, Val Acc: 74.77%, LR: 0.000644
  *** New best: 74.77% ***
Epoch 69/150




Train Loss: 1.1318, Train Acc: 51.74%, Val Loss: 0.6978, Val Acc: 74.46%, LR: 0.000633
Epoch 70/150




Train Loss: 1.1351, Train Acc: 51.15%, Val Loss: 0.7078, Val Acc: 73.66%, LR: 0.000622
Epoch 71/150




Train Loss: 1.1308, Train Acc: 51.72%, Val Loss: 0.6782, Val Acc: 75.29%, LR: 0.000611
  *** New best: 75.29% ***
Epoch 72/150




Train Loss: 1.1350, Train Acc: 51.48%, Val Loss: 0.6823, Val Acc: 75.05%, LR: 0.000600
Epoch 73/150




Train Loss: 1.1285, Train Acc: 52.04%, Val Loss: 0.6790, Val Acc: 75.17%, LR: 0.000589
Epoch 74/150




Train Loss: 1.1238, Train Acc: 51.33%, Val Loss: 0.6744, Val Acc: 75.84%, LR: 0.000578
  *** New best: 75.84% ***
Epoch 75/150




Train Loss: 1.1260, Train Acc: 51.54%, Val Loss: 0.6882, Val Acc: 75.49%, LR: 0.000567
Epoch 76/150




Train Loss: 1.1168, Train Acc: 52.37%, Val Loss: 0.6684, Val Acc: 75.76%, LR: 0.000556
Epoch 77/150




Train Loss: 1.1094, Train Acc: 52.28%, Val Loss: 0.6614, Val Acc: 76.40%, LR: 0.000545
  *** New best: 76.40% ***
Epoch 78/150




Train Loss: 1.1293, Train Acc: 51.35%, Val Loss: 0.6675, Val Acc: 75.52%, LR: 0.000534
Epoch 79/150




Train Loss: 1.1084, Train Acc: 52.69%, Val Loss: 0.6776, Val Acc: 75.56%, LR: 0.000522
Epoch 80/150




Train Loss: 1.1145, Train Acc: 52.68%, Val Loss: 0.6790, Val Acc: 74.57%, LR: 0.000511
Epoch 81/150




Train Loss: 1.1130, Train Acc: 52.45%, Val Loss: 0.6632, Val Acc: 75.56%, LR: 0.000500
Epoch 82/150




Train Loss: 1.1043, Train Acc: 52.96%, Val Loss: 0.6864, Val Acc: 75.80%, LR: 0.000489
Epoch 83/150




Train Loss: 1.1010, Train Acc: 52.78%, Val Loss: 0.6689, Val Acc: 75.80%, LR: 0.000478
Epoch 84/150




Train Loss: 1.1030, Train Acc: 53.09%, Val Loss: 0.6816, Val Acc: 74.53%, LR: 0.000466
Epoch 85/150




Train Loss: 1.0991, Train Acc: 53.45%, Val Loss: 0.6735, Val Acc: 75.88%, LR: 0.000455
Epoch 86/150




Train Loss: 1.1016, Train Acc: 52.55%, Val Loss: 0.6668, Val Acc: 76.24%, LR: 0.000444
Epoch 87/150




Train Loss: 1.0928, Train Acc: 53.62%, Val Loss: 0.6642, Val Acc: 76.55%, LR: 0.000433
  *** New best: 76.55% ***
Epoch 88/150




Train Loss: 1.0976, Train Acc: 53.20%, Val Loss: 0.6592, Val Acc: 75.68%, LR: 0.000422
Epoch 89/150




Train Loss: 1.0965, Train Acc: 53.39%, Val Loss: 0.6518, Val Acc: 76.44%, LR: 0.000411
Epoch 90/150




Train Loss: 1.0955, Train Acc: 53.17%, Val Loss: 0.6568, Val Acc: 76.16%, LR: 0.000400
Epoch 91/150




Train Loss: 1.0930, Train Acc: 53.38%, Val Loss: 0.6624, Val Acc: 75.92%, LR: 0.000389
Epoch 92/150




Train Loss: 1.0839, Train Acc: 53.87%, Val Loss: 0.6450, Val Acc: 76.04%, LR: 0.000378
Epoch 93/150




Train Loss: 1.0848, Train Acc: 53.45%, Val Loss: 0.6601, Val Acc: 76.04%, LR: 0.000367
Epoch 94/150




Train Loss: 1.0792, Train Acc: 53.90%, Val Loss: 0.6661, Val Acc: 75.17%, LR: 0.000356
Epoch 95/150




Train Loss: 1.0834, Train Acc: 53.63%, Val Loss: 0.6462, Val Acc: 76.28%, LR: 0.000345
Epoch 96/150




Train Loss: 1.0861, Train Acc: 53.62%, Val Loss: 0.6542, Val Acc: 75.80%, LR: 0.000335
Epoch 97/150




Train Loss: 1.0786, Train Acc: 53.76%, Val Loss: 0.6520, Val Acc: 75.68%, LR: 0.000324
Epoch 98/150




Train Loss: 1.0799, Train Acc: 53.88%, Val Loss: 0.6527, Val Acc: 76.12%, LR: 0.000314
Epoch 99/150




Train Loss: 1.0840, Train Acc: 53.67%, Val Loss: 0.6462, Val Acc: 76.63%, LR: 0.000303
  *** New best: 76.63% ***
Epoch 100/150




Train Loss: 1.0761, Train Acc: 54.34%, Val Loss: 0.6609, Val Acc: 76.04%, LR: 0.000293
Epoch 101/150




Train Loss: 1.0698, Train Acc: 54.53%, Val Loss: 0.6441, Val Acc: 76.63%, LR: 0.000283
Epoch 102/150




Train Loss: 1.0778, Train Acc: 53.98%, Val Loss: 0.6320, Val Acc: 76.83%, LR: 0.000273
  *** New best: 76.83% ***
Epoch 103/150




Train Loss: 1.0818, Train Acc: 53.66%, Val Loss: 0.6576, Val Acc: 75.96%, LR: 0.000263
Epoch 104/150




Train Loss: 1.0660, Train Acc: 54.21%, Val Loss: 0.6380, Val Acc: 76.40%, LR: 0.000253
Epoch 105/150




Train Loss: 1.0698, Train Acc: 54.42%, Val Loss: 0.6420, Val Acc: 76.99%, LR: 0.000244
  *** New best: 76.99% ***
Epoch 106/150




Train Loss: 1.0636, Train Acc: 54.26%, Val Loss: 0.6632, Val Acc: 76.00%, LR: 0.000234
Epoch 107/150




Train Loss: 1.0641, Train Acc: 54.61%, Val Loss: 0.6472, Val Acc: 76.36%, LR: 0.000225
Epoch 108/150




Train Loss: 1.0672, Train Acc: 54.39%, Val Loss: 0.6359, Val Acc: 77.39%, LR: 0.000215
  *** New best: 77.39% ***
Epoch 109/150




Train Loss: 1.0604, Train Acc: 54.58%, Val Loss: 0.6383, Val Acc: 76.59%, LR: 0.000206
Epoch 110/150




Train Loss: 1.0682, Train Acc: 54.42%, Val Loss: 0.6328, Val Acc: 77.03%, LR: 0.000197
Epoch 111/150




Train Loss: 1.0613, Train Acc: 54.41%, Val Loss: 0.6427, Val Acc: 76.59%, LR: 0.000188
Epoch 112/150




Train Loss: 1.0556, Train Acc: 54.75%, Val Loss: 0.6327, Val Acc: 77.07%, LR: 0.000180
Epoch 113/150




Train Loss: 1.0636, Train Acc: 54.53%, Val Loss: 0.6459, Val Acc: 76.75%, LR: 0.000171
Epoch 114/150




Train Loss: 1.0474, Train Acc: 55.72%, Val Loss: 0.6334, Val Acc: 76.83%, LR: 0.000163
Epoch 115/150




Train Loss: 1.0587, Train Acc: 54.54%, Val Loss: 0.6337, Val Acc: 77.39%, LR: 0.000154
Epoch 116/150




Train Loss: 1.0530, Train Acc: 55.17%, Val Loss: 0.6321, Val Acc: 77.39%, LR: 0.000146
Epoch 117/150




Train Loss: 1.0459, Train Acc: 55.04%, Val Loss: 0.6381, Val Acc: 76.71%, LR: 0.000139
Epoch 118/150




Train Loss: 1.0574, Train Acc: 54.42%, Val Loss: 0.6426, Val Acc: 77.31%, LR: 0.000131
Epoch 119/150




Train Loss: 1.0529, Train Acc: 55.20%, Val Loss: 0.6345, Val Acc: 77.54%, LR: 0.000123
  *** New best: 77.54% ***
Epoch 120/150




Train Loss: 1.0405, Train Acc: 55.40%, Val Loss: 0.6309, Val Acc: 77.03%, LR: 0.000116
Epoch 121/150




Train Loss: 1.0422, Train Acc: 55.18%, Val Loss: 0.6477, Val Acc: 76.79%, LR: 0.000109
Epoch 122/150




Train Loss: 1.0452, Train Acc: 55.10%, Val Loss: 0.6433, Val Acc: 76.71%, LR: 0.000102
Epoch 123/150




Train Loss: 1.0473, Train Acc: 55.07%, Val Loss: 0.6414, Val Acc: 76.63%, LR: 0.000095
Epoch 124/150




Train Loss: 1.0417, Train Acc: 55.35%, Val Loss: 0.6417, Val Acc: 76.99%, LR: 0.000089
Epoch 125/150




Train Loss: 1.0427, Train Acc: 55.01%, Val Loss: 0.6278, Val Acc: 77.39%, LR: 0.000083
Epoch 126/150




Train Loss: 1.0474, Train Acc: 55.27%, Val Loss: 0.6309, Val Acc: 78.02%, LR: 0.000077
  *** New best: 78.02% ***
Epoch 127/150




Train Loss: 1.0381, Train Acc: 55.71%, Val Loss: 0.6313, Val Acc: 76.87%, LR: 0.000071
Epoch 128/150




Train Loss: 1.0428, Train Acc: 54.97%, Val Loss: 0.6266, Val Acc: 77.23%, LR: 0.000065
Epoch 129/150




Train Loss: 1.0547, Train Acc: 54.83%, Val Loss: 0.6321, Val Acc: 77.58%, LR: 0.000060
Epoch 130/150




Train Loss: 1.0436, Train Acc: 55.48%, Val Loss: 0.6346, Val Acc: 76.91%, LR: 0.000054
Epoch 131/150




Train Loss: 1.0377, Train Acc: 55.14%, Val Loss: 0.6405, Val Acc: 77.62%, LR: 0.000050
Epoch 132/150




Train Loss: 1.0458, Train Acc: 55.08%, Val Loss: 0.6405, Val Acc: 76.75%, LR: 0.000045
Epoch 133/150




Train Loss: 1.0435, Train Acc: 55.15%, Val Loss: 0.6420, Val Acc: 77.07%, LR: 0.000040
Epoch 134/150




Train Loss: 1.0387, Train Acc: 55.67%, Val Loss: 0.6290, Val Acc: 77.66%, LR: 0.000036
Epoch 135/150




Train Loss: 1.0346, Train Acc: 55.28%, Val Loss: 0.6309, Val Acc: 77.19%, LR: 0.000032
Epoch 136/150




Train Loss: 1.0436, Train Acc: 55.55%, Val Loss: 0.6257, Val Acc: 77.58%, LR: 0.000028
Epoch 137/150




Train Loss: 1.0330, Train Acc: 55.39%, Val Loss: 0.6372, Val Acc: 77.43%, LR: 0.000024
Epoch 138/150




Train Loss: 1.0412, Train Acc: 55.37%, Val Loss: 0.6275, Val Acc: 77.31%, LR: 0.000021
Epoch 139/150




Train Loss: 1.0403, Train Acc: 55.65%, Val Loss: 0.6314, Val Acc: 77.50%, LR: 0.000018
Epoch 140/150




Train Loss: 1.0428, Train Acc: 55.11%, Val Loss: 0.6346, Val Acc: 77.03%, LR: 0.000015
Epoch 141/150




Train Loss: 1.0284, Train Acc: 55.86%, Val Loss: 0.6315, Val Acc: 77.50%, LR: 0.000013
Epoch 142/150




Train Loss: 1.0318, Train Acc: 55.72%, Val Loss: 0.6291, Val Acc: 77.47%, LR: 0.000010
Epoch 143/150




Train Loss: 1.0378, Train Acc: 55.23%, Val Loss: 0.6263, Val Acc: 77.39%, LR: 0.000008
Epoch 144/150




Train Loss: 1.0371, Train Acc: 55.28%, Val Loss: 0.6252, Val Acc: 77.07%, LR: 0.000006
Epoch 145/150




Train Loss: 1.0387, Train Acc: 55.37%, Val Loss: 0.6310, Val Acc: 77.35%, LR: 0.000005
Epoch 146/150




Train Loss: 1.0431, Train Acc: 54.99%, Val Loss: 0.6272, Val Acc: 77.35%, LR: 0.000003
Epoch 147/150




Train Loss: 1.0383, Train Acc: 55.54%, Val Loss: 0.6271, Val Acc: 76.95%, LR: 0.000002
Epoch 148/150




Train Loss: 1.0390, Train Acc: 55.57%, Val Loss: 0.6287, Val Acc: 77.19%, LR: 0.000001
Epoch 149/150




Train Loss: 1.0405, Train Acc: 55.29%, Val Loss: 0.6298, Val Acc: 77.35%, LR: 0.000001
Epoch 150/150




Train Loss: 1.0241, Train Acc: 56.21%, Val Loss: 0.6274, Val Acc: 77.43%, LR: 0.000000

Loading best model from training: 78.02%

Initial model performance:


                                                                                      

Validation Accuracy: 78.02%




In [12]:
# STEP 2: Gradual Iterative Pruning
print("\n" + "="*60)
print("STEP 2: Gradual Iterative Pruning")
print("="*60)

bn_weights = gather_bn_weights(model_bn).cpu().numpy()
print(f"BN weights - Min: {bn_weights.min():.6f}, Max: {bn_weights.max():.6f}, "
      f"Mean: {bn_weights.mean():.6f}, Median: {np.median(bn_weights):.6f}")

pruning_schedule = [25, 40, 50, 60, 65]
finetune_epochs_per_step = 30

optimizer = torch.optim.Adam(model_bn.parameters(), lr=0.0005, weight_decay=5e-4)

best_overall_model = None
best_overall_accuracy = 0

for prune_percent in pruning_schedule:
    print(f"\n{'='*60}")
    print(f"Pruning to {prune_percent}%...")
    print(f"{'='*60}")

    cfg = prune_model(model_bn, prune_percent)
    print(f"Channel configuration after {prune_percent}% pruning: {cfg}")

    sparsity, num_zero, total_params = calculate_sparsity(model_bn)
    print(f"Sparsity: {100*sparsity:.2f}% ({num_zero}/{total_params} zeros)")

    val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
    print(f"Validation Accuracy after pruning: {val_accuracy:.2f}%")

    masks = {}
    for name, param in model_bn.named_parameters():
        masks[name] = (param.data != 0).float().to(device)

    print(f"\nFine-tuning for {finetune_epochs_per_step} epochs...")
    best_in_step = 0
    for epoch in range(finetune_epochs_per_step):
        train_loss, train_accuracy = train_one_epoch_with_bn_reg(
            model_bn, train_loader, optimizer, criterion, device, l1_lambda=0
        )

        with torch.no_grad():
            for name, param in model_bn.named_parameters():
                param.data.mul_(masks[name])

        if (epoch + 1) % 5 == 0:
            val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
            print(f"  Epoch {epoch+1}/{finetune_epochs_per_step}: "
                  f"Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

            # Track best in this step
            if val_accuracy > best_in_step:
                best_in_step = val_accuracy

    val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
    print(f"\nAfter {prune_percent}% pruning + fine-tuning: Val Acc = {val_accuracy:.2f}%")

    # Track best overall model
    if val_accuracy > best_overall_accuracy:
        best_overall_accuracy = val_accuracy
        best_overall_model = {k: v.cpu().clone() for k, v in model_bn.state_dict().items()}
        print(f"  *** New overall best: {best_overall_accuracy:.2f}% ***")

print("\n" + "="*60)
print("PRUNING RESULTS BEFORE LINEAR LAYER PRUNING")
print("="*60)
sparsity, num_zero, total_params = calculate_sparsity(model_bn)
val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
print(f"Validation Accuracy: {val_accuracy:.2f}%")
print(f"Sparsity: {100*sparsity:.2f}% ({num_zero}/{total_params} zeros)")


STEP 2: Gradual Iterative Pruning
BN weights - Min: 0.000002, Max: 1.066803, Mean: 0.399659, Median: 0.376439

Pruning to 25%...
Pruning threshold: 0.296871
Channel configuration after 25% pruning: [22, 29, 50, 42]
Sparsity: 34.97% (207412/593125 zeros)




Validation Accuracy after pruning: 70.50%

Fine-tuning for 30 epochs...




  Epoch 5/30: Train Acc: 48.57%, Val Acc: 75.52%




  Epoch 10/30: Train Acc: 49.14%, Val Acc: 74.30%




  Epoch 15/30: Train Acc: 49.27%, Val Acc: 73.74%




  Epoch 20/30: Train Acc: 49.59%, Val Acc: 73.19%




  Epoch 25/30: Train Acc: 49.59%, Val Acc: 74.10%




  Epoch 30/30: Train Acc: 49.31%, Val Acc: 73.19%





After 25% pruning + fine-tuning: Val Acc = 73.19%
  *** New overall best: 73.19% ***

Pruning to 40%...
Pruning threshold: 0.357970
Channel configuration after 40% pruning: [22, 26, 44, 23]
Sparsity: 63.44% (376256/593125 zeros)




Validation Accuracy after pruning: 62.46%

Fine-tuning for 30 epochs...




  Epoch 5/30: Train Acc: 46.45%, Val Acc: 72.71%




  Epoch 10/30: Train Acc: 47.52%, Val Acc: 73.35%




  Epoch 15/30: Train Acc: 46.94%, Val Acc: 72.59%




  Epoch 20/30: Train Acc: 47.38%, Val Acc: 73.74%




  Epoch 25/30: Train Acc: 47.56%, Val Acc: 71.64%




  Epoch 30/30: Train Acc: 47.42%, Val Acc: 72.00%





After 40% pruning + fine-tuning: Val Acc = 72.00%

Pruning to 50%...
Pruning threshold: 0.374534
Channel configuration after 50% pruning: [19, 26, 30, 20]
Sparsity: 68.90% (408639/593125 zeros)




Validation Accuracy after pruning: 62.22%

Fine-tuning for 30 epochs...




  Epoch 5/30: Train Acc: 46.19%, Val Acc: 69.94%




  Epoch 10/30: Train Acc: 46.23%, Val Acc: 70.50%




  Epoch 15/30: Train Acc: 47.49%, Val Acc: 71.96%




  Epoch 20/30: Train Acc: 46.75%, Val Acc: 72.63%




  Epoch 25/30: Train Acc: 46.99%, Val Acc: 70.53%




  Epoch 30/30: Train Acc: 46.71%, Val Acc: 68.28%





After 50% pruning + fine-tuning: Val Acc = 68.28%

Pruning to 60%...
Pruning threshold: 0.408282
Channel configuration after 60% pruning: [14, 25, 20, 17]
Sparsity: 74.11% (439544/593125 zeros)




Validation Accuracy after pruning: 34.97%

Fine-tuning for 30 epochs...




  Epoch 5/30: Train Acc: 44.62%, Val Acc: 69.70%




  Epoch 10/30: Train Acc: 45.24%, Val Acc: 68.83%




  Epoch 15/30: Train Acc: 45.75%, Val Acc: 64.51%




  Epoch 20/30: Train Acc: 46.02%, Val Acc: 69.94%




  Epoch 25/30: Train Acc: 45.90%, Val Acc: 68.28%




  Epoch 30/30: Train Acc: 45.12%, Val Acc: 70.77%





After 60% pruning + fine-tuning: Val Acc = 70.77%

Pruning to 65%...
Pruning threshold: 0.423807
Channel configuration after 65% pruning: [13, 24, 15, 15]
Sparsity: 77.32% (458610/593125 zeros)




Validation Accuracy after pruning: 47.88%

Fine-tuning for 30 epochs...




  Epoch 5/30: Train Acc: 44.40%, Val Acc: 69.19%




  Epoch 10/30: Train Acc: 44.90%, Val Acc: 68.91%




  Epoch 15/30: Train Acc: 45.25%, Val Acc: 68.67%




  Epoch 20/30: Train Acc: 44.90%, Val Acc: 68.71%




  Epoch 25/30: Train Acc: 45.34%, Val Acc: 69.31%




  Epoch 30/30: Train Acc: 45.23%, Val Acc: 69.62%





After 65% pruning + fine-tuning: Val Acc = 69.62%

PRUNING RESULTS BEFORE LINEAR LAYER PRUNING


                                                                                        

Validation Accuracy: 69.62%
Sparsity: 77.32% (458610/593125 zeros)




In [13]:
print("\n" + "="*60)
print("STEP 2.5: Linear Layer Pruning")
print("="*60)

# Prune linear layers
prune_linear_layers(model_bn, percent=60)

sparsity, num_zero, total_params = calculate_sparsity(model_bn)
print(f"Sparsity after linear pruning: {100*sparsity:.2f}% ({num_zero}/{total_params} zeros)")

val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
print(f"Validation Accuracy after linear pruning: {val_accuracy:.2f}%")

# Update masks to include linear layer pruning
masks = {}
for name, param in model_bn.named_parameters():
    masks[name] = (param.data != 0).float().to(device)

# Quick fine-tune after linear pruning
print("\nFine-tuning after linear layer pruning (20 epochs)...")
optimizer = torch.optim.Adam(model_bn.parameters(), lr=0.0003, weight_decay=5e-4)

for epoch in range(20):
    train_loss, train_accuracy = train_one_epoch_with_bn_reg(
        model_bn, train_loader, optimizer, criterion, device, l1_lambda=0
    )

    with torch.no_grad():
        for name, param in model_bn.named_parameters():
            param.data.mul_(masks[name])

    if (epoch + 1) % 5 == 0:
        val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
        print(f"  Epoch {epoch+1}/20: Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)
sparsity, num_zero, total_params = calculate_sparsity(model_bn)
print(f"\nAfter linear pruning + fine-tuning:")
print(f"Validation Accuracy: {val_accuracy:.2f}%")
print(f"Sparsity: {100*sparsity:.2f}% ({num_zero}/{total_params} zeros)")


STEP 2.5: Linear Layer Pruning

Pruning linear layers by 60%...
Sparsity after linear pruning: 77.58% (460146/593125 zeros)




Validation Accuracy after linear pruning: 69.74%

Fine-tuning after linear layer pruning (20 epochs)...




  Epoch 5/20: Train Acc: 45.73%, Val Acc: 70.57%




  Epoch 10/20: Train Acc: 46.08%, Val Acc: 70.46%




  Epoch 15/20: Train Acc: 46.64%, Val Acc: 70.69%




  Epoch 20/20: Train Acc: 46.31%, Val Acc: 68.79%





After linear pruning + fine-tuning:
Validation Accuracy: 68.79%
Sparsity: 77.58% (460146/593125 zeros)


In [14]:
# STEP 3: Enhanced Final Fine-tuning
print("\n" + "="*60)
print("STEP 3: Enhanced Final Fine-tuning")
print("="*60)

num_finetune_epochs = 60
optimizer = torch.optim.Adam(model_bn.parameters(), lr=0.0005, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_finetune_epochs)

# Ensure masks are up-to-date
masks = {}
for name, param in model_bn.named_parameters():
    masks[name] = (param.data != 0).float().to(device)

best_val_accuracy = 0
best_final_model = None

for epoch in range(num_finetune_epochs):
    print(f"Final Fine-tune Epoch {epoch+1}/{num_finetune_epochs}")

    train_loss, train_accuracy = train_one_epoch_with_bn_reg(
        model_bn, train_loader, optimizer, criterion, device, l1_lambda=0
    )

    with torch.no_grad():
        for name, param in model_bn.named_parameters():
            param.data.mul_(masks[name])

    val_loss, val_accuracy = validate(model_bn, val_loader, criterion, device)

    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, '
          f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%, '
          f'LR: {scheduler.get_last_lr()[0]:.6f}')

    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_final_model = {k: v.cpu().clone() for k, v in model_bn.state_dict().items()}
        print(f"  *** New best validation accuracy: {best_val_accuracy:.2f}% ***")

    scheduler.step()

# Load best model from final fine-tuning
if best_final_model:
    print(f"\nLoading best model from final fine-tuning: {best_val_accuracy:.2f}%")
    model_bn.load_state_dict({k: v.to(device) for k, v in best_final_model.items()})

print(f"\nBest validation accuracy during fine-tuning: {best_val_accuracy:.2f}%")


STEP 3: Enhanced Final Fine-tuning
Final Fine-tune Epoch 1/60




Train Loss: 1.2401, Train Acc: 45.85%, Val Loss: 0.7985, Val Acc: 71.01%, LR: 0.000500
  *** New best validation accuracy: 71.01% ***
Final Fine-tune Epoch 2/60




Train Loss: 1.2483, Train Acc: 45.35%, Val Loss: 0.8326, Val Acc: 67.76%, LR: 0.000500
Final Fine-tune Epoch 3/60




Train Loss: 1.2462, Train Acc: 45.14%, Val Loss: 0.8105, Val Acc: 69.94%, LR: 0.000499
Final Fine-tune Epoch 4/60




Train Loss: 1.2405, Train Acc: 45.64%, Val Loss: 0.8702, Val Acc: 66.30%, LR: 0.000497
Final Fine-tune Epoch 5/60




Train Loss: 1.2403, Train Acc: 45.74%, Val Loss: 0.8193, Val Acc: 69.03%, LR: 0.000495
Final Fine-tune Epoch 6/60




Train Loss: 1.2422, Train Acc: 45.21%, Val Loss: 0.7895, Val Acc: 70.53%, LR: 0.000491
Final Fine-tune Epoch 7/60




Train Loss: 1.2371, Train Acc: 45.49%, Val Loss: 0.9067, Val Acc: 65.31%, LR: 0.000488
Final Fine-tune Epoch 8/60




Train Loss: 1.2415, Train Acc: 45.37%, Val Loss: 0.8131, Val Acc: 70.02%, LR: 0.000483
Final Fine-tune Epoch 9/60




Train Loss: 1.2371, Train Acc: 45.58%, Val Loss: 0.8369, Val Acc: 67.76%, LR: 0.000478
Final Fine-tune Epoch 10/60




Train Loss: 1.2418, Train Acc: 45.58%, Val Loss: 0.8101, Val Acc: 68.51%, LR: 0.000473
Final Fine-tune Epoch 11/60




Train Loss: 1.2342, Train Acc: 46.02%, Val Loss: 0.8315, Val Acc: 68.12%, LR: 0.000467
Final Fine-tune Epoch 12/60




Train Loss: 1.2398, Train Acc: 45.01%, Val Loss: 0.8516, Val Acc: 67.33%, LR: 0.000460
Final Fine-tune Epoch 13/60




Train Loss: 1.2348, Train Acc: 45.76%, Val Loss: 0.7931, Val Acc: 69.90%, LR: 0.000452
Final Fine-tune Epoch 14/60




Train Loss: 1.2387, Train Acc: 45.58%, Val Loss: 0.8048, Val Acc: 69.66%, LR: 0.000444
Final Fine-tune Epoch 15/60




Train Loss: 1.2384, Train Acc: 45.88%, Val Loss: 0.8167, Val Acc: 69.90%, LR: 0.000436
Final Fine-tune Epoch 16/60




Train Loss: 1.2394, Train Acc: 45.35%, Val Loss: 0.7997, Val Acc: 70.53%, LR: 0.000427
Final Fine-tune Epoch 17/60




Train Loss: 1.2343, Train Acc: 45.86%, Val Loss: 0.8480, Val Acc: 68.55%, LR: 0.000417
Final Fine-tune Epoch 18/60




Train Loss: 1.2302, Train Acc: 45.63%, Val Loss: 0.8774, Val Acc: 66.93%, LR: 0.000407
Final Fine-tune Epoch 19/60




Train Loss: 1.2344, Train Acc: 45.69%, Val Loss: 0.8318, Val Acc: 68.91%, LR: 0.000397
Final Fine-tune Epoch 20/60




Train Loss: 1.2318, Train Acc: 46.00%, Val Loss: 0.7908, Val Acc: 71.13%, LR: 0.000386
  *** New best validation accuracy: 71.13% ***
Final Fine-tune Epoch 21/60




Train Loss: 1.2260, Train Acc: 46.30%, Val Loss: 0.8119, Val Acc: 69.70%, LR: 0.000375
Final Fine-tune Epoch 22/60




Train Loss: 1.2263, Train Acc: 45.78%, Val Loss: 0.7919, Val Acc: 70.85%, LR: 0.000363
Final Fine-tune Epoch 23/60




Train Loss: 1.2268, Train Acc: 45.72%, Val Loss: 0.8346, Val Acc: 67.92%, LR: 0.000352
Final Fine-tune Epoch 24/60




Train Loss: 1.2305, Train Acc: 45.74%, Val Loss: 0.7880, Val Acc: 71.49%, LR: 0.000340
  *** New best validation accuracy: 71.49% ***
Final Fine-tune Epoch 25/60




Train Loss: 1.2221, Train Acc: 46.52%, Val Loss: 0.7878, Val Acc: 70.38%, LR: 0.000327
Final Fine-tune Epoch 26/60




Train Loss: 1.2189, Train Acc: 46.66%, Val Loss: 0.8476, Val Acc: 67.64%, LR: 0.000315
Final Fine-tune Epoch 27/60




Train Loss: 1.2272, Train Acc: 46.18%, Val Loss: 0.8209, Val Acc: 69.35%, LR: 0.000302
Final Fine-tune Epoch 28/60




Train Loss: 1.2328, Train Acc: 45.74%, Val Loss: 0.7720, Val Acc: 71.72%, LR: 0.000289
  *** New best validation accuracy: 71.72% ***
Final Fine-tune Epoch 29/60




Train Loss: 1.2135, Train Acc: 46.91%, Val Loss: 0.7722, Val Acc: 70.93%, LR: 0.000276
Final Fine-tune Epoch 30/60




Train Loss: 1.2211, Train Acc: 46.62%, Val Loss: 0.8022, Val Acc: 71.13%, LR: 0.000263
Final Fine-tune Epoch 31/60




Train Loss: 1.2102, Train Acc: 46.88%, Val Loss: 0.7663, Val Acc: 71.60%, LR: 0.000250
Final Fine-tune Epoch 32/60




Train Loss: 1.2173, Train Acc: 46.37%, Val Loss: 0.7615, Val Acc: 71.92%, LR: 0.000237
  *** New best validation accuracy: 71.92% ***
Final Fine-tune Epoch 33/60




Train Loss: 1.2210, Train Acc: 46.42%, Val Loss: 0.7916, Val Acc: 70.77%, LR: 0.000224
Final Fine-tune Epoch 34/60




Train Loss: 1.2133, Train Acc: 46.82%, Val Loss: 0.7663, Val Acc: 71.64%, LR: 0.000211
Final Fine-tune Epoch 35/60




Train Loss: 1.2056, Train Acc: 47.10%, Val Loss: 0.7706, Val Acc: 71.17%, LR: 0.000198
Final Fine-tune Epoch 36/60




Train Loss: 1.2147, Train Acc: 46.66%, Val Loss: 0.7681, Val Acc: 72.04%, LR: 0.000185
  *** New best validation accuracy: 72.04% ***
Final Fine-tune Epoch 37/60




Train Loss: 1.2055, Train Acc: 47.31%, Val Loss: 0.7997, Val Acc: 69.62%, LR: 0.000173
Final Fine-tune Epoch 38/60




Train Loss: 1.2063, Train Acc: 47.50%, Val Loss: 0.7791, Val Acc: 71.01%, LR: 0.000160
Final Fine-tune Epoch 39/60




Train Loss: 1.2092, Train Acc: 46.83%, Val Loss: 0.7651, Val Acc: 70.89%, LR: 0.000148
Final Fine-tune Epoch 40/60




Train Loss: 1.2105, Train Acc: 47.04%, Val Loss: 0.7696, Val Acc: 71.29%, LR: 0.000137
Final Fine-tune Epoch 41/60




Train Loss: 1.2000, Train Acc: 47.39%, Val Loss: 0.7797, Val Acc: 71.76%, LR: 0.000125
Final Fine-tune Epoch 42/60




Train Loss: 1.2029, Train Acc: 47.30%, Val Loss: 0.7935, Val Acc: 70.89%, LR: 0.000114
Final Fine-tune Epoch 43/60




Train Loss: 1.1982, Train Acc: 47.64%, Val Loss: 0.7575, Val Acc: 72.48%, LR: 0.000103
  *** New best validation accuracy: 72.48% ***
Final Fine-tune Epoch 44/60




Train Loss: 1.1995, Train Acc: 47.60%, Val Loss: 0.7637, Val Acc: 71.37%, LR: 0.000093
Final Fine-tune Epoch 45/60




Train Loss: 1.2038, Train Acc: 47.36%, Val Loss: 0.7577, Val Acc: 71.92%, LR: 0.000083
Final Fine-tune Epoch 46/60




Train Loss: 1.1923, Train Acc: 47.88%, Val Loss: 0.7734, Val Acc: 71.25%, LR: 0.000073
Final Fine-tune Epoch 47/60




Train Loss: 1.1974, Train Acc: 47.24%, Val Loss: 0.7480, Val Acc: 72.63%, LR: 0.000064
  *** New best validation accuracy: 72.63% ***
Final Fine-tune Epoch 48/60




Train Loss: 1.2011, Train Acc: 47.21%, Val Loss: 0.7559, Val Acc: 72.08%, LR: 0.000056
Final Fine-tune Epoch 49/60




Train Loss: 1.1953, Train Acc: 47.77%, Val Loss: 0.7465, Val Acc: 72.40%, LR: 0.000048
Final Fine-tune Epoch 50/60




Train Loss: 1.1960, Train Acc: 47.37%, Val Loss: 0.7448, Val Acc: 72.95%, LR: 0.000040
  *** New best validation accuracy: 72.95% ***
Final Fine-tune Epoch 51/60




Train Loss: 1.1952, Train Acc: 47.83%, Val Loss: 0.7568, Val Acc: 72.28%, LR: 0.000033
Final Fine-tune Epoch 52/60




Train Loss: 1.1916, Train Acc: 47.81%, Val Loss: 0.7574, Val Acc: 72.51%, LR: 0.000027
Final Fine-tune Epoch 53/60




Train Loss: 1.1926, Train Acc: 47.60%, Val Loss: 0.7513, Val Acc: 72.32%, LR: 0.000022
Final Fine-tune Epoch 54/60




Train Loss: 1.1925, Train Acc: 47.87%, Val Loss: 0.7544, Val Acc: 72.55%, LR: 0.000017
Final Fine-tune Epoch 55/60




Train Loss: 1.1844, Train Acc: 48.44%, Val Loss: 0.7472, Val Acc: 73.07%, LR: 0.000012
  *** New best validation accuracy: 73.07% ***
Final Fine-tune Epoch 56/60




Train Loss: 1.1885, Train Acc: 48.36%, Val Loss: 0.7449, Val Acc: 72.40%, LR: 0.000009
Final Fine-tune Epoch 57/60




Train Loss: 1.1906, Train Acc: 47.84%, Val Loss: 0.7454, Val Acc: 72.79%, LR: 0.000005
Final Fine-tune Epoch 58/60




Train Loss: 1.2007, Train Acc: 47.54%, Val Loss: 0.7564, Val Acc: 72.28%, LR: 0.000003
Final Fine-tune Epoch 59/60




Train Loss: 1.1869, Train Acc: 47.72%, Val Loss: 0.7593, Val Acc: 72.40%, LR: 0.000001
Final Fine-tune Epoch 60/60


                                                                                        

Train Loss: 1.1902, Train Acc: 48.31%, Val Loss: 0.7465, Val Acc: 72.75%, LR: 0.000000

Loading best model from final fine-tuning: 73.07%

Best validation accuracy during fine-tuning: 73.07%




In [15]:
# Transfer weights from BN model to original architecture
print("\n" + "="*60)
print("Transferring weights to original architecture")
print("="*60)

# Define original ConvNet WITHOUT BatchNorm
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=0, bias=True),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.25),
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=0, bias=True),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.25),
            nn.Flatten(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 5),
        )
    def forward(self, x):
        return self.model(x)

original_model = ConvNet().to(device)

bn_conv_layers = []
bn_bn_layers = []
for module in model_bn.modules():
    if isinstance(module, nn.Conv2d):
        bn_conv_layers.append(module)
    elif isinstance(module, nn.BatchNorm2d):
        bn_bn_layers.append(module)

orig_conv_layers = []
for module in original_model.modules():
    if isinstance(module, nn.Conv2d):
        orig_conv_layers.append(module)

with torch.no_grad():
    for i, (orig_conv, bn_conv, bn) in enumerate(zip(orig_conv_layers, bn_conv_layers, bn_bn_layers)):
        orig_conv.weight.data = bn_conv.weight.data.clone()

        gamma = bn.weight.data
        beta = bn.bias.data
        mean = bn.running_mean
        var = bn.running_var
        eps = bn.eps

        std = torch.sqrt(var + eps)
        bn_weight_normalized = gamma / std
        bn_bias_normalized = beta - gamma * mean / std

        for j in range(orig_conv.weight.data.shape[0]):
            orig_conv.weight.data[j] = orig_conv.weight.data[j] * bn_weight_normalized[j]

        orig_conv.bias.data = bn_bias_normalized.clone()

    bn_linear_layers = [m for m in model_bn.modules() if isinstance(m, nn.Linear)]
    orig_linear_layers = [m for m in original_model.modules() if isinstance(m, nn.Linear)]

    for bn_linear, orig_linear in zip(bn_linear_layers, orig_linear_layers):
        orig_linear.weight.data = bn_linear.weight.data.clone()
        orig_linear.bias.data = bn_linear.bias.data.clone()

print("Weights transferred successfully!")

val_loss, val_accuracy = validate(original_model, val_loader, criterion, device)
sparsity, num_zero, total_params = calculate_sparsity(original_model)

print(f"\nOriginal architecture with pruned weights:")
print(f"Validation Accuracy: {val_accuracy:.2f}%")
print(f"Sparsity: {100*sparsity:.2f}% ({num_zero}/{total_params} zeros)")

accuracy = val_accuracy / 100
if accuracy > 0.6 and sparsity > 0:
    score = (accuracy + sparsity) / 2
else:
    score = 0
print(f"FINAL SCORE: {score:.4f}")

torch.save(original_model.state_dict(), 'pruned_model_for_autograder.pt', _use_new_zipfile_serialization=False)
print("\nSaved pruned model for autograder: pruned_model_for_autograder.pt")


Transferring weights to original architecture
Weights transferred successfully!


                                                                                        


Original architecture with pruned weights:
Validation Accuracy: 73.07%
Sparsity: 77.58% (460021/592933 zeros)
FINAL SCORE: 0.7533

Saved pruned model for autograder: pruned_model_for_autograder.pt




In [16]:
torch.save(original_model.state_dict(), 'my_model_weights_1.pt', _use_new_zipfile_serialization=False)

In [17]:
torch.save(original_model.state_dict(), 'my_model_weights_2.pt', _use_new_zipfile_serialization=False)

In [18]:
torch.save(original_model.state_dict(), 'my_model_weights_3.pt', _use_new_zipfile_serialization=False)

In [19]:
import glob
import zipfile
import os

pt_files = glob.glob('*.pt')
zip_file_name = 'my_model_weights.zip'

with zipfile.ZipFile(zip_file_name, 'w') as zipf:
    for file in pt_files:
        zipf.write(file, os.path.basename(file))

print(f"Successfully created {zip_file_name} containing: {pt_files}")

Successfully created my_model_weights.zip containing: ['pruned_model_for_autograder.pt', 'my_model_weights_3.pt', 'my_model_weights_2.pt', 'my_model_weights_1.pt']


In [20]:
!ls -lh my_model_weights.zip

-rw-r--r-- 1 root root 9.1M Dec  1 15:14 my_model_weights.zip
