In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import models, datasets, transforms
import numpy as np
from tqdm import tqdm

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the transforms for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Load the CIFAR-10 dataset
full_train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split the full training dataset into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Create DataLoaders for the train, validation, and test sets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

# Print dataset sizes for debugging
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified
Training set size: 40000
Validation set size: 10000
Test set size: 10000




In [None]:
# Load the pretrained VGG16 model and modify the classifier
model = models.vgg16(pretrained=True)
input_lastLayer = model.classifier[6].in_features
model.classifier[6] = nn.Linear(input_lastLayer, 10)  # CIFAR-10 has 10 classes
model = model.to(device)

# Define loss function and optimizer
learning_rate = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)



Run Unpruned model

In [None]:
# Training loop
epochs = 5
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct_train / total_train

    # Validation phase
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_val_loss += loss.item()

            # Calculate validation accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss = running_val_loss / len(val_loader)
    val_accuracy = 100 * correct_val / total_val

    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")
    print(f"Epoch [{epoch+1}/{epochs}], Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")



Epoch 1/5 - Training: 100%|██████████| 625/625 [00:44<00:00, 13.99it/s]
Epoch 1/5 - Validation: 100%|██████████| 157/157 [00:04<00:00, 35.67it/s]


Epoch [1/5], Train Loss: 0.8928, Train Accuracy: 70.59%
Epoch [1/5], Val Loss: 0.7277, Val Accuracy: 76.26%


Epoch 2/5 - Training: 100%|██████████| 625/625 [00:43<00:00, 14.39it/s]
Epoch 2/5 - Validation: 100%|██████████| 157/157 [00:03<00:00, 43.33it/s]


Epoch [2/5], Train Loss: 0.5022, Train Accuracy: 83.81%
Epoch [2/5], Val Loss: 0.5125, Val Accuracy: 83.54%


Epoch 3/5 - Training: 100%|██████████| 625/625 [00:43<00:00, 14.31it/s]
Epoch 3/5 - Validation: 100%|██████████| 157/157 [00:04<00:00, 32.40it/s]


Epoch [3/5], Train Loss: 0.3600, Train Accuracy: 88.33%
Epoch [3/5], Val Loss: 0.5866, Val Accuracy: 80.36%


Epoch 4/5 - Training: 100%|██████████| 625/625 [00:43<00:00, 14.42it/s]
Epoch 4/5 - Validation: 100%|██████████| 157/157 [00:03<00:00, 42.72it/s]


Epoch [4/5], Train Loss: 0.2682, Train Accuracy: 91.24%
Epoch [4/5], Val Loss: 0.5240, Val Accuracy: 83.75%


Epoch 5/5 - Training: 100%|██████████| 625/625 [00:43<00:00, 14.37it/s]
Epoch 5/5 - Validation: 100%|██████████| 157/157 [00:04<00:00, 36.32it/s]

Epoch [5/5], Train Loss: 0.2207, Train Accuracy: 92.72%
Epoch [5/5], Val Loss: 0.4619, Val Accuracy: 85.78%





In [None]:
# Test phase
model.eval()
correct_test = 0
total_test = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted == labels).sum().item()

test_accuracy = 100 * correct_test / total_test
print(f'Accuracy of the model on the test set: {test_accuracy:.2f}%')

Accuracy of the model on the test set: 85.40%


In [None]:
first_model = model
#model = first_model

In [None]:
def calculate_sparse_flops(model, input_size=(3, 32, 32)):
    total_flops = 0
    model.eval()
    with torch.no_grad():
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d):
                # Calculate FLOPs for Conv2d layers considering sparsity
                non_zero_weights = torch.count_nonzero(module.weight).item()
                 # Kernel size, output channels, and input dimensions
                kernel_ops = module.weight.size(2) * module.weight.size(3)
                flops_per_instance = non_zero_weights * kernel_ops
                output_size = (input_size[1] // module.stride[0], input_size[2] // module.stride[1])
                total_flops += flops_per_instance * output_size[0] * output_size[1]

            elif isinstance(module, nn.Linear):
                # Calculate FLOPs for Linear layers considering sparsity
                non_zero_weights = torch.count_nonzero(module.weight).item()
                total_flops += non_zero_weights * input_size[0]

    return total_flops

flops = calculate_sparse_flops(first_model)
print(f"FLOPs after pruning considering sparsity: {flops / 1e9:.2f} GFLOPs")

FLOPs after pruning considering sparsity: 135.93 GFLOPs


## PRUNE STEP

In [None]:
# Print the names and parameters of the model
for name, param in model.named_parameters():
    print(f"Layer: {name} | Shape: {param.shape}")

Layer: features.0.weight | Shape: torch.Size([64, 3, 3, 3])
Layer: features.0.bias | Shape: torch.Size([64])
Layer: features.2.weight | Shape: torch.Size([64, 64, 3, 3])
Layer: features.2.bias | Shape: torch.Size([64])
Layer: features.5.weight | Shape: torch.Size([128, 64, 3, 3])
Layer: features.5.bias | Shape: torch.Size([128])
Layer: features.7.weight | Shape: torch.Size([128, 128, 3, 3])
Layer: features.7.bias | Shape: torch.Size([128])
Layer: features.10.weight | Shape: torch.Size([256, 128, 3, 3])
Layer: features.10.bias | Shape: torch.Size([256])
Layer: features.12.weight | Shape: torch.Size([256, 256, 3, 3])
Layer: features.12.bias | Shape: torch.Size([256])
Layer: features.14.weight | Shape: torch.Size([256, 256, 3, 3])
Layer: features.14.bias | Shape: torch.Size([256])
Layer: features.17.weight | Shape: torch.Size([512, 256, 3, 3])
Layer: features.17.bias | Shape: torch.Size([512])
Layer: features.19.weight | Shape: torch.Size([512, 512, 3, 3])
Layer: features.19.bias | Shape:

In [None]:
print(model.features[0].weight)
print(model.features[2].weight)
print(model.features[5].weight)

Parameter containing:
tensor([[[[-0.4877,  0.1529,  0.4783],
          [-0.5063,  0.3665,  0.7135],
          [-0.6076, -0.0277,  0.4539]],

         [[ 0.1705,  0.0229, -0.0789],
          [ 0.0487, -0.0504, -0.2444],
          [ 0.1165, -0.1675, -0.1258]],

         [[ 0.3344, -0.1313, -0.4129],
          [ 0.4744, -0.0623, -0.4787],
          [ 0.5887,  0.0096, -0.2661]]],


        [[[ 0.2842,  0.1195,  0.1251],
          [-0.3434, -0.2462,  0.1632],
          [-0.1853,  0.1178, -0.0922]],

         [[-0.0602, -0.2022,  0.0816],
          [-0.7162, -0.3504,  0.4379],
          [-0.1919,  0.4378,  0.3846]],

         [[-0.2091, -0.3171, -0.1297],
          [-0.3927, -0.1523,  0.2905],
          [ 0.0789,  0.5353,  0.3963]]],


        [[[ 0.1385,  0.4646, -0.0523],
          [-0.2042, -0.6723,  0.3179],
          [-0.0037, -0.1952,  0.3273]],

         [[ 0.2445,  0.5725, -0.0353],
          [-0.3822, -0.9976,  0.3337],
          [-0.0029, -0.2721,  0.5006]],

         [[ 0.2705,  0

In [None]:
# Print the names and parameters of the model
# for name, param in model.named_parameters():
#     print(f"Layer: {name} | Params: {param}")

# Print the buffers and parameters of the model
# should be empty
for name, buffer in model.named_buffers():
    print(f"Layer: {name} | Buffers: {buffer}")

print(model._forward_pre_hooks) # should be empty

OrderedDict()


In [None]:
from torch.nn.utils.prune import l1_unstructured

# Load the pretrained VGG16 model and modify the classifier
# model = models.vgg16(pretrained=True)
input_lastLayer = model.classifier[6].in_features
model.classifier[6] = nn.Linear(input_lastLayer, 10)  # CIFAR-10 has 10 classes

In [None]:
import torch.nn.utils.prune as prune

# Apply L1 unstructured pruning to all convolutional layers in the 'features' part
for name, module in model.named_modules():
    if isinstance(module, nn.Conv2d):
        prune.l1_unstructured(module, name="weight", amount=0.9)  # Prune 20% of the weights
# Apply L1 unstructured pruning to all fully connected layers in the 'classifier' part
for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name="weight", amount=0.9)  # Prune 20% of the weights

In [None]:
# Apply L1 unstructured pruning to all fully connected layers in the 'classifier' part
for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name="weight", amount=0.9)  # Prune 20% of the weights

In [None]:
# Print the mask for a specific layer
print(f"Mask for 'features.0.weight':")
print(model.features[0].weight_mask)  # Shows which weights were pruned (0 or 1)

Mask for 'features.0.weight':
tensor([[[[1., 0., 1.],
          [1., 1., 1.],
          [1., 0., 1.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 1.],
          [1., 0., 1.],
          [1., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [1., 0., 1.],
          [0., 1., 1.]],

         [[0., 0., 0.],
          [1., 0., 0.],
          [0., 1., 1.]]],


        [[[0., 1., 0.],
          [0., 1., 0.],
          [0., 0., 0.]],

         [[0., 1., 0.],
          [1., 1., 0.],
          [0., 0., 1.]],

         [[0., 1., 1.],
          [0., 1., 0.],
          [0., 0., 0.]]],


        ...,


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [

In [None]:
# Alternatively, check the sparsity of a layer (i.e., how many weights are zeroed)
print(f"Sparsity of 'features.0.weight':")
print(torch.sum(model.features[0].weight == 0).item() / model.features[0].weight.nelement())  # Fraction of pruned weights

Sparsity of 'features.0.weight':
0.8998842592592593


In [None]:
model = model.to(device)

In [None]:
# Training loop
epochs = 10
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct_train / total_train

    # Validation phase
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_val_loss += loss.item()

            # Calculate validation accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss = running_val_loss / len(val_loader)
    val_accuracy = 100 * correct_val / total_val

    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")
    print(f"Epoch [{epoch+1}/{epochs}], Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")



Epoch 1/10 - Training: 100%|██████████| 625/625 [00:51<00:00, 12.11it/s]
Epoch 1/10 - Validation: 100%|██████████| 157/157 [00:04<00:00, 35.59it/s]


Epoch [1/10], Train Loss: 0.2336, Train Accuracy: 92.20%
Epoch [1/10], Val Loss: 0.4632, Val Accuracy: 85.48%


Epoch 2/10 - Training: 100%|██████████| 625/625 [00:51<00:00, 12.17it/s]
Epoch 2/10 - Validation: 100%|██████████| 157/157 [00:04<00:00, 36.09it/s]


Epoch [2/10], Train Loss: 0.1877, Train Accuracy: 93.83%
Epoch [2/10], Val Loss: 0.4376, Val Accuracy: 86.18%


Epoch 3/10 - Training: 100%|██████████| 625/625 [00:51<00:00, 12.11it/s]
Epoch 3/10 - Validation: 100%|██████████| 157/157 [00:04<00:00, 36.68it/s]


Epoch [3/10], Train Loss: 0.1546, Train Accuracy: 94.76%
Epoch [3/10], Val Loss: 0.4479, Val Accuracy: 86.49%


Epoch 4/10 - Training: 100%|██████████| 625/625 [00:51<00:00, 12.08it/s]
Epoch 4/10 - Validation: 100%|██████████| 157/157 [00:05<00:00, 30.56it/s]


Epoch [4/10], Train Loss: 0.1272, Train Accuracy: 95.81%
Epoch [4/10], Val Loss: 0.5121, Val Accuracy: 85.63%


Epoch 5/10 - Training: 100%|██████████| 625/625 [00:51<00:00, 12.13it/s]
Epoch 5/10 - Validation:  90%|████████▉ | 141/157 [00:03<00:00, 38.24it/s]

In [None]:
# Test phase
model.eval()
correct_test = 0
total_test = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted == labels).sum().item()

test_accuracy = 100 * correct_test / total_test
print(f'Accuracy of the model on the test set: {test_accuracy:.2f}%')


Accuracy of the model on the test set: 86.37%


In [None]:
def calculate_sparse_flops(model, input_size=(3, 32, 32)):
    total_flops = 0
    model.eval()
    with torch.no_grad():
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d):
                # Calculate FLOPs for Conv2d layers considering sparsity
                non_zero_weights = torch.count_nonzero(module.weight).item()
                # Kernel size, output channels, and input dimensions
                kernel_ops = module.weight.size(2) * module.weight.size(3)
                flops_per_instance = non_zero_weights * kernel_ops
                output_size = (input_size[1] // module.stride[0], input_size[2] // module.stride[1])
                total_flops += flops_per_instance * output_size[0] * output_size[1]

            elif isinstance(module, nn.Linear):
                # Calculate FLOPs for Linear layers considering sparsity
                non_zero_weights = torch.count_nonzero(module.weight).item()
                total_flops += non_zero_weights * input_size[0]

    return total_flops

flops = calculate_sparse_flops(model)
print(f"FLOPs after pruning considering sparsity: {flops / 1e9:.2f} GFLOPs")

FLOPs after pruning considering sparsity: 13.59 GFLOPs


In [None]:
# Pruning 20% : 87.07
# Pruning 40% : 86.16
# Pruning 75% : 86.11
# Pruning 95% : 70.92

## RESNET


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, random_split
from torchvision import models, datasets, transforms
import time
import numpy as np
from tqdm import tqdm

In [None]:
# Define data augmentations for training
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [None]:
# Load CIFAR-10 dataset with train/test splits
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Split the training data into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4, pin_memory=True)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Load the ResNet-50 model and modify the final layer
teacher_model = models.resnet50(pretrained=True)
teacher_model.fc = nn.Linear(2048, 10)  # CIFAR-10 has 10 classes

# Check if GPU is available and move model to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
teacher_model.to(device)

# Define loss function, optimizer, and learning rate scheduler
criterion_ce = nn.CrossEntropyLoss()
optimizer_teacher = optim.Adam(teacher_model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = StepLR(optimizer_teacher, step_size=10, gamma=0.1)  # Reduce LR by 0.1 every 10 epochs

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 119MB/s]


prune

In [None]:
layer_weights = teacher_model.layer1[2].conv2.weight
print(layer_weights)

Parameter containing:
tensor([[[[ 3.3096e-02, -2.2837e-03, -2.6060e-02],
          [-1.8888e-03, -1.4137e-02, -7.5973e-03],
          [-4.9556e-02,  2.8547e-02, -1.2019e-02]],

         [[ 2.0762e-02, -6.0752e-02,  1.7711e-02],
          [ 2.2331e-02, -5.3168e-02,  2.7273e-02],
          [-1.8588e-02,  7.2604e-03, -4.4545e-03]],

         [[ 1.1293e-02,  4.2911e-03,  2.0791e-02],
          [-2.3401e-03, -1.7693e-02,  1.8178e-02],
          [-5.9822e-03, -9.6152e-03,  8.1386e-03]],

         ...,

         [[ 1.3661e-02, -1.2209e-02, -4.6657e-03],
          [-3.2961e-03, -3.7944e-03, -2.1736e-02],
          [-2.4662e-02,  2.3986e-02, -1.5785e-02]],

         [[-4.2326e-02,  5.5694e-03,  1.0661e-02],
          [-4.7409e-02,  3.7776e-02,  2.1256e-02],
          [-9.4837e-03,  1.1733e-02, -6.1717e-03]],

         [[ 3.1075e-03, -7.9047e-03, -2.5325e-02],
          [ 8.5359e-03, -6.7326e-03, -1.2789e-02],
          [ 7.2580e-03, -1.6795e-02, -7.6375e-03]]],


        [[[ 2.1006e-02, -1.9891

In [None]:
# Print weights for each layer in the model
# for name, param in teacher_model.named_parameters():
#     print(f"Layer: {name}, Weights: {param.data}")
# Access weights of conv1 within the first Bottleneck block in layer1

# first_block = teacher_model.layer1[0]  # Access the first Bottleneck block of layer1
# conv1_weights = first_block.conv1.weight.data  # Access conv1 weights within this block
# print("Weights of conv1 in the first block of layer1:")
# print(conv1_weights)

# second_block = teacher_model.layer1[1]  # Access the first Bottleneck block of layer1
# conv1_weights = second_block.conv1.weight.data  # Access conv1 weights within this block
# print("Weights of conv1 in the first block of layer1:")
# print(conv1_weights)

# # Print weights of conv1 in the first block of layer1
# for name, module in teacher_model.layer2.named_children():
#     if isinstance(module, nn.Conv2d):
#         print(f"Weights of conv layer {name} in layer1:")
#         print(module.weight.data)

In [None]:
import torch.nn.utils.prune as prune
# Define the sparsity level (percentage of weights to prune)
sparsity = 0.9  # 50% sparsity

# Prune the convolutional layers
for name, module in teacher_model.named_modules():
    if isinstance(module, nn.Conv2d):
        # Apply L1 unstructured pruning to each Conv2d layer
        prune.l1_unstructured(module, name='weight', amount=sparsity)
        #print(f"Pruned {name} layer with {sparsity * 100}% sparsity")

In [None]:
# first_block = teacher_model.layer1[0]  # Access the first Bottleneck block of layer1
# conv1_weights = first_block.conv1.weight.data  # Access conv1 weights within this block
# print("Weights of conv1 in the first block of layer1:")
# print(conv1_weights)

# second_block = teacher_model.layer1[1]  # Access the first Bottleneck block of layer1
# conv1_weights = second_block.conv1.weight.data  # Access conv1 weights within this block
# print("Weights of conv1 in the first block of layer1:")
# print(conv1_weights)

In [None]:
layer_weights = teacher_model.layer1[2].conv2.weight
print(layer_weights)

tensor([[[[ 0.0000, -0.0000, -0.0000],
          [-0.0000, -0.0000, -0.0000],
          [-0.0000,  0.0000, -0.0000]],

         [[ 0.0000, -0.0608,  0.0000],
          [ 0.0000, -0.0532,  0.0000],
          [-0.0000,  0.0000, -0.0000]],

         [[ 0.0000,  0.0000,  0.0000],
          [-0.0000, -0.0000,  0.0000],
          [-0.0000, -0.0000,  0.0000]],

         ...,

         [[ 0.0000, -0.0000, -0.0000],
          [-0.0000, -0.0000, -0.0000],
          [-0.0000,  0.0000, -0.0000]],

         [[-0.0000,  0.0000,  0.0000],
          [-0.0000,  0.0000,  0.0000],
          [-0.0000,  0.0000, -0.0000]],

         [[ 0.0000, -0.0000, -0.0000],
          [ 0.0000, -0.0000, -0.0000],
          [ 0.0000, -0.0000, -0.0000]]],


        [[[ 0.0000, -0.0000, -0.0000],
          [-0.0000,  0.0000, -0.0000],
          [ 0.0000,  0.0772, -0.0554]],

         [[-0.0000,  0.0679, -0.0000],
          [-0.0548,  0.0633, -0.0000],
          [-0.0000, -0.0000, -0.0000]],

         [[ 0.0000, -0.0000,  0

In [None]:
# Training loop
num_epochs_teacher = 30  # Increased epochs for better accuracy
best_val_accuracy = 0.0
start_time_teacher = time.time()

for epoch in range(num_epochs_teacher):
    # Training phase
    teacher_model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for inputs, labels in tqdm(train_loader, desc=f'Teacher Model Training Epoch [{epoch + 1}/{num_epochs_teacher}]'):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer_teacher.zero_grad()
        outputs = teacher_model(inputs)
        loss = criterion_ce(outputs, labels)
        loss.backward()
        optimizer_teacher.step()

        running_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct_train / total_train

    # Validation phase
    teacher_model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = teacher_model(inputs)
            loss = criterion_ce(outputs, labels)
            running_val_loss += loss.item()

            # Calculate validation accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss = running_val_loss / len(val_loader)
    val_accuracy = 100 * correct_val / total_val

    # Save the model if validation accuracy improves
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(teacher_model.state_dict(), 'teacher_model_best.pth')
        print(f"Best model saved with accuracy: {best_val_accuracy:.2f}%")

    # Step the learning rate scheduler
    scheduler.step()

    print(f"Epoch [{epoch + 1}/{num_epochs_teacher}], "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

end_time_teacher = time.time()
print(f"Teacher training completed in {(end_time_teacher - start_time_teacher) / 60:.2f} minutes")


Teacher Model Training Epoch [1/20]: 100%|██████████| 313/313 [00:46<00:00,  6.73it/s]


Best model saved with accuracy: 66.10%
Epoch [1/20], Train Loss: 1.0811, Train Accuracy: 63.39%, Val Loss: 0.9772, Val Accuracy: 66.10%


Teacher Model Training Epoch [2/20]: 100%|██████████| 313/313 [00:45<00:00,  6.93it/s]


Best model saved with accuracy: 69.51%
Epoch [2/20], Train Loss: 0.9342, Train Accuracy: 67.85%, Val Loss: 0.8733, Val Accuracy: 69.51%


Teacher Model Training Epoch [3/20]: 100%|██████████| 313/313 [00:45<00:00,  6.82it/s]


Best model saved with accuracy: 71.51%
Epoch [3/20], Train Loss: 0.8355, Train Accuracy: 71.02%, Val Loss: 0.8043, Val Accuracy: 71.51%


Teacher Model Training Epoch [4/20]: 100%|██████████| 313/313 [00:45<00:00,  6.84it/s]


Best model saved with accuracy: 72.46%
Epoch [4/20], Train Loss: 0.8151, Train Accuracy: 71.77%, Val Loss: 0.7887, Val Accuracy: 72.46%


Teacher Model Training Epoch [5/20]: 100%|██████████| 313/313 [00:45<00:00,  6.84it/s]


Epoch [5/20], Train Loss: 0.7626, Train Accuracy: 73.25%, Val Loss: 0.7943, Val Accuracy: 72.17%


Teacher Model Training Epoch [6/20]: 100%|██████████| 313/313 [00:46<00:00,  6.79it/s]


Best model saved with accuracy: 73.22%
Epoch [6/20], Train Loss: 0.7195, Train Accuracy: 74.95%, Val Loss: 0.7598, Val Accuracy: 73.22%


Teacher Model Training Epoch [7/20]: 100%|██████████| 313/313 [00:46<00:00,  6.79it/s]


Best model saved with accuracy: 74.63%
Epoch [7/20], Train Loss: 0.6957, Train Accuracy: 75.77%, Val Loss: 0.7200, Val Accuracy: 74.63%


Teacher Model Training Epoch [8/20]: 100%|██████████| 313/313 [00:44<00:00,  6.97it/s]


Epoch [8/20], Train Loss: 0.6935, Train Accuracy: 75.73%, Val Loss: 0.7288, Val Accuracy: 74.57%


Teacher Model Training Epoch [9/20]: 100%|██████████| 313/313 [00:45<00:00,  6.94it/s]


Best model saved with accuracy: 75.51%
Epoch [9/20], Train Loss: 0.6826, Train Accuracy: 76.08%, Val Loss: 0.6971, Val Accuracy: 75.51%


Teacher Model Training Epoch [10/20]: 100%|██████████| 313/313 [00:44<00:00,  7.09it/s]


Best model saved with accuracy: 78.11%
Epoch [10/20], Train Loss: 0.5898, Train Accuracy: 79.26%, Val Loss: 0.6272, Val Accuracy: 78.11%


Teacher Model Training Epoch [11/20]: 100%|██████████| 313/313 [00:44<00:00,  7.02it/s]


Best model saved with accuracy: 79.13%
Epoch [11/20], Train Loss: 0.5581, Train Accuracy: 80.50%, Val Loss: 0.6097, Val Accuracy: 79.13%


Teacher Model Training Epoch [12/20]: 100%|██████████| 313/313 [00:43<00:00,  7.17it/s]


Epoch [12/20], Train Loss: 0.5484, Train Accuracy: 80.58%, Val Loss: 0.6059, Val Accuracy: 78.57%


Teacher Model Training Epoch [13/20]: 100%|██████████| 313/313 [00:43<00:00,  7.14it/s]


Epoch [13/20], Train Loss: 0.5315, Train Accuracy: 81.33%, Val Loss: 0.6007, Val Accuracy: 78.97%


Teacher Model Training Epoch [14/20]: 100%|██████████| 313/313 [00:44<00:00,  7.03it/s]


Best model saved with accuracy: 79.26%
Epoch [14/20], Train Loss: 0.5252, Train Accuracy: 81.55%, Val Loss: 0.5927, Val Accuracy: 79.26%


Teacher Model Training Epoch [15/20]: 100%|██████████| 313/313 [00:43<00:00,  7.28it/s]


Best model saved with accuracy: 79.80%
Epoch [15/20], Train Loss: 0.5177, Train Accuracy: 81.67%, Val Loss: 0.5882, Val Accuracy: 79.80%


Teacher Model Training Epoch [16/20]: 100%|██████████| 313/313 [00:41<00:00,  7.45it/s]


Best model saved with accuracy: 79.91%
Epoch [16/20], Train Loss: 0.5035, Train Accuracy: 82.29%, Val Loss: 0.5759, Val Accuracy: 79.91%


Teacher Model Training Epoch [17/20]: 100%|██████████| 313/313 [00:42<00:00,  7.45it/s]


Epoch [17/20], Train Loss: 0.4988, Train Accuracy: 82.56%, Val Loss: 0.5850, Val Accuracy: 79.60%


Teacher Model Training Epoch [18/20]: 100%|██████████| 313/313 [00:41<00:00,  7.47it/s]


Best model saved with accuracy: 80.07%
Epoch [18/20], Train Loss: 0.4939, Train Accuracy: 82.61%, Val Loss: 0.5860, Val Accuracy: 80.07%


Teacher Model Training Epoch [19/20]: 100%|██████████| 313/313 [00:42<00:00,  7.44it/s]


Epoch [19/20], Train Loss: 0.4862, Train Accuracy: 82.76%, Val Loss: 0.5807, Val Accuracy: 80.07%


Teacher Model Training Epoch [20/20]: 100%|██████████| 313/313 [00:41<00:00,  7.49it/s]


Epoch [20/20], Train Loss: 0.4792, Train Accuracy: 83.10%, Val Loss: 0.5796, Val Accuracy: 79.89%
Teacher training completed in 17.92 minutes


In [None]:
# # Load the best model for testing
# teacher_model.load_state_dict(torch.load('teacher_model_best_pruned.pth'))

# Testing phase
teacher_model.eval()
correct_test = 0
total_test = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = teacher_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted == labels).sum().item()

test_accuracy = 100 * correct_test / total_test
print(f'Accuracy of the teacher model on the test set: {test_accuracy:.2f}%')


Accuracy of the teacher model on the test set: 83.66%
