# Selecting device

In [1]:
import torch 
import torch.nn as nn

device = "cpu"
if torch.cuda.is_available():
    device = "cuda"

# General function to test a model

In [2]:
import numpy as np

def test_model(model):
    model.eval()
    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    timings = []
    #GPU-WARM-UP
    i=0
    for data in test_loader:
        if(i>1000):
            break
        images, labels = data
        images = images.to(device)
        _ = model(images)
        i += 1
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            
            starter.record()
            outputs = model(images)
            ender.record()
            
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings.append(curr_time)
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the test images: '+str(100 * correct / total))
    
    tot = np.sum(timings)
    mean_syn_per_batch = np.sum(timings) / len(timings)
    std_syn_per_batch = np.std(timings)
    print("Total inference time for test data: "+str(tot))
    print("Mean inference time per test batch: "+str(mean_syn_per_batch))
    print("Standard deviation of inference times per test batch: "+str(std_syn_per_batch))
    model.train()

# Loading and normalizing images using TorchVision


In [3]:
import numpy as np
import os
import shutil

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os, sys, shutil, time, random
from scipy.spatial import distance

In [4]:
train_data = datasets.Flowers102('./', split = "train", download=True,
                             transform=transforms.Compose([
                                 transforms.Pad(4),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                             ]))
        
val_data = datasets.Flowers102('./', split = "val", download=True,
                             transform=transforms.Compose([
                                 transforms.Pad(4),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                             ]))
        
combined_train_data = torch.utils.data.ConcatDataset([train_data, val_data])
        
min_height = combined_train_data[0][0].shape[1]
min_width = combined_train_data[0][0].shape[2]

max_height = combined_train_data[0][0].shape[1]
max_width = combined_train_data[0][0].shape[2]

for i in range(len(combined_train_data)):
    if combined_train_data[i][0].shape[1] < min_height :
        min_height = combined_train_data[i][0].shape[1]
    if combined_train_data[i][0].shape[2] < min_width:
        min_width = combined_train_data[i][0].shape[2]
                
    if combined_train_data[i][0].shape[1] > max_height:
        max_height = combined_train_data[i][0].shape[1]
    if combined_train_data[i][0].shape[2] > max_width:
        max_width = combined_train_data[i][0].shape[2]
        
new_size = ((min_height+max_height)//2, (min_width+max_width)//2)
        
train_data = datasets.Flowers102('./', split = "train", download=True,
                             transform=transforms.Compose([
                                 transforms.Pad(4),
                                 transforms.Resize(new_size),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                             ]))
        
val_data = datasets.Flowers102('./', split = "val", download=True,
                             transform=transforms.Compose([
                                 transforms.Pad(4),
                                 transforms.Resize(new_size),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                             ]))
        
combined_train_data = torch.utils.data.ConcatDataset([train_data, val_data])
        
train_loader = torch.utils.data.DataLoader(
            combined_train_data,
            batch_size=16, shuffle=True)
        
test_loader = torch.utils.data.DataLoader(
            datasets.Flowers102('./', split = "test", download=True,
                             transform=transforms.Compose([
                                 transforms.Pad(4),
                                 transforms.Resize(new_size),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                             ])),
            batch_size=16, shuffle=False)

Downloading https://thor.robots.ox.ac.uk/datasets/flowers-102/102flowers.tgz to flowers-102/102flowers.tgz


100%|██████████| 344862509/344862509 [00:11<00:00, 31348989.37it/s]


Extracting flowers-102/102flowers.tgz to flowers-102
Downloading https://thor.robots.ox.ac.uk/datasets/flowers-102/imagelabels.mat to flowers-102/imagelabels.mat


100%|██████████| 502/502 [00:00<00:00, 294275.42it/s]


Downloading https://thor.robots.ox.ac.uk/datasets/flowers-102/setid.mat to flowers-102/setid.mat


100%|██████████| 14989/14989 [00:00<00:00, 6137696.25it/s]


In [5]:
import math

import torch
import torch.nn as nn
from torch.autograd import Variable

class alexnet(nn.Module):
    def __init__(self, dataset='flowers102', init_weights=False):
        super(alexnet, self).__init__()


        self.imgnet_an = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
        self.an_features = getattr(self.imgnet_an, 'features')
        self.an_avgpool = getattr(self.imgnet_an, 'avgpool')
        self.an_classifier = getattr(self.imgnet_an, 'classifier')

        if dataset == 'cifar10':
            num_classes = 10
        elif dataset == 'cifar100':
            num_classes = 100
        elif dataset == 'flowers102':
            num_classes = 102
            
        self.classifier = nn.Sequential(
              nn.Linear(1000, 512),
              nn.BatchNorm1d(512),
              nn.ReLU(inplace=True),
              nn.Linear(512, num_classes)
            )
        
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        
        x = self.an_features(x)
        x = self.an_avgpool(x)
        x = torch.flatten(x, 1)
        x = self.an_classifier(x)
        
        y = self.classifier(x)
        
        return y

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(0.5)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


In [6]:
arch = "alexnet"

# The unpruned model

In [7]:
import sys
sys.path.append("./alexnet-fpgm/testing")
import models

unpruned_model = models.__dict__[arch](dataset='flowers102')
unpruned_model.to(device)

total = 0
print('\nTrainable parameters:')

for n, module in unpruned_model.named_modules():
    for name, param in module.named_parameters():
        if param.requires_grad:
            print(n+"."+name, '\t', param.numel())
            total += param.numel()
print()
print('Total', '\t', total)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:00<00:00, 258MB/s]



Trainable parameters:
.an_features.0.weight 	 23232
.an_features.0.bias 	 64
.an_features.3.weight 	 307200
.an_features.3.bias 	 192
.an_features.6.weight 	 663552
.an_features.6.bias 	 384
.an_features.8.weight 	 884736
.an_features.8.bias 	 256
.an_features.10.weight 	 589824
.an_features.10.bias 	 256
.an_classifier.1.weight 	 37748736
.an_classifier.1.bias 	 4096
.an_classifier.4.weight 	 16777216
.an_classifier.4.bias 	 4096
.an_classifier.6.weight 	 4096000
.an_classifier.6.bias 	 1000
.classifier.0.weight 	 512000
.classifier.0.bias 	 512
.classifier.1.weight 	 512
.classifier.1.bias 	 512
.classifier.3.weight 	 52224
.classifier.3.bias 	 102
an_features.0.weight 	 23232
an_features.0.bias 	 64
an_features.3.weight 	 307200
an_features.3.bias 	 192
an_features.6.weight 	 663552
an_features.6.bias 	 384
an_features.8.weight 	 884736
an_features.8.bias 	 256
an_features.10.weight 	 589824
an_features.10.bias 	 256
an_features.0.weight 	 23232
an_features.0.bias 	 64
an_features.

In [8]:
from torch.autograd import Variable

# def train(train_loader, model, optimizer, epoch, m=0):
#     model.train()
#     avg_loss = 0. 
#     train_acc = 0.
#     for batch_idx, (data, target) in enumerate(train_loader):
#         if torch.cuda.is_available():
#             data, target = data.cuda(), target.cuda()
#         data, target = Variable(data), Variable(target)
#         optimizer.zero_grad()
#         output = model(data)
#         loss = F.cross_entropy(output, target)
#         avg_loss += loss.item()
#         pred = output.data.max(1, keepdim=True)[1]
#         train_acc += pred.eq(target.data.view_as(pred)).cpu().sum()
#         loss.backward()
#         optimizer.step()
#         if batch_idx % 10 == 0:
#             print('Train Epoch: {} [{}/{} ({:.1f}%)]\tLoss: {:.6f}'.format(
#                 epoch, batch_idx * len(data), len(train_loader.dataset),
#                        100. * batch_idx / len(train_loader), loss.item()))

In [9]:
import torch.nn.functional as F
import torch.optim as optim

# optimizer = optim.SGD(unpruned_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

# best_prec1 = "NULL"
# for epoch in range(0, 160):
#     if epoch in [160 * 0.5, 160 * 0.75]:
#         for param_group in optimizer.param_groups:
#             param_group['lr'] *= 0.1
#     train(train_loader, unpruned_model, optimizer, epoch)

In [10]:
# torch.save(unpruned_model, './alexnet_fl102_unpruned_net.pth') # without .state_dict

# Testing the accuracy of the unpruned model

In [11]:
# test_model(unpruned_model)

# Pruning using FPGM

In [12]:
cfg = [64, 192, 192, 180, 150]
tot_conv_filters = [64, 192, 384, 256, 256]

In [13]:
! python3  ./alexnet-fpgm/testing/pruning_fl102_alexnet.py . --batch-size 16 --test-batch-size 16 --dataset flowers102 --arch alexnet --save_path ./logs/alexnet_pretrain/prune_precfg_epoch160 --rate_norm 1 --rate_dist 0.5 --cfg 64,192,192,180,150 --use_state_dict --lr 0.001 --epochs 135 --epoch_prune 5 --use_precfg

save path : ./logs/alexnet_pretrain/prune_precfg_epoch160
{'data_path': '.', 'dataset': 'flowers102', 'batch_size': 16, 'test_batch_size': 16, 'epochs': 135, 'start_epoch': 0, 'lr': 0.001, 'momentum': 0.9, 'weight_decay': 0.0001, 'resume': '', 'no_cuda': False, 'seed': 1, 'log_interval': 10, 'save_path': './logs/alexnet_pretrain/prune_precfg_epoch160', 'arch': 'alexnet', 'rate_norm': 1.0, 'rate_dist': 0.5, 'cfg': '64,192,192,180,150', 'layer_begin': 1, 'layer_end': 1, 'layer_inter': 1, 'epoch_prune': 5, 'dist_type': 'l2', 'use_state_dict': True, 'pretrain_path': '', 'use_precfg': True, 'evaluate': False, 'cuda': True}
Random Seed: 1
python version : 3.10.10 | packaged by conda-forge | (main, Mar 24 2023, 20:08:06) [GCC 11.3.0]
torch  version : 2.0.0
cudnn  version : 8700
Norm Pruning Rate: 1.0
Distance Pruning Rate: 0.5
Layer Begin: 1
Layer End: 1
Layer Inter: 1
Epoch prune: 5
Pretrain path: 
Dist type: l2
Pre cfg: True
=> creating model 'alexnet'
Using cache found in /

# Loading the pruned (only zeroed out) model

In [14]:
import sys
sys.path.append("./alexnet-fpgm/testing")
import models

pruned_model = models.__dict__[arch](dataset='flowers102')
pruned_model.to(device)

filepath = './logs/alexnet_pretrain/prune_precfg_epoch160'
filename = os.path.join(filepath, 'checkpoint.pth.tar')
pruned_model.load_state_dict(torch.load(filename)['state_dict'])

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


<All keys matched successfully>

# Saving the pruned (only zeroed out) model

In [15]:
torch.save(pruned_model, './alexnet_fl102_pruned_net.pth') # without .state_dict

# Let's test the accuracy of the pruned (only zeroed out) model

In [16]:
test_model(pruned_model)

Accuracy of the network on the test images: 84.12750040657018
Total inference time for test data: 18513.639444351196
Mean inference time per test batch: 48.08737518013298
Standard deviation of inference times per test batch: 1.1002719128541492


# Changing the architecture

In [17]:
!pip install torch-pruning
import torch_pruning as tp
    
for name, module in pruned_model.named_modules():
    if isinstance(module, torch.nn.Conv2d): #Iterating over all the conv2d layers of the model
        channel_indices = [] #Stores indices of the channels to prune within this conv layer
        t = module.weight.clone().detach()
        t = t.reshape(t.shape[0], -1)
        z = torch.all(t == 0, dim=1)
        z = z.tolist()
        
        for i, flag in enumerate(z):
            if(flag):
                channel_indices.append(i)

        if(channel_indices == []):
            continue
        
        # 1. build dependency graph for vgg
        shp = combined_train_data[0][0].shape
        DG = tp.DependencyGraph().build_dependency(pruned_model, example_inputs=torch.randn(1,shp[0],shp[1],shp[2]).to(device))

        # 2. Specify the to-be-pruned channels. Here we prune those channels indexed by idxs.
        group = DG.get_pruning_group(module, tp.prune_conv_out_channels, idxs=channel_indices)
        #print(group)

        # 3. prune all grouped layers that are coupled with the conv layer (included).
        if DG.check_pruning_group(group): # avoid full pruning, i.e., channels=0.
            group.prune()
    
# 4. Save & Load
pruned_model.zero_grad() # We don't want to store gradient information
torch.save(pruned_model, './alexnet_fl102_arch_pruned_net.pth') # without .state_dict

Collecting torch-pruning
  Downloading torch_pruning-1.1.9-py3-none-any.whl (39 kB)
Installing collected packages: torch-pruning
Successfully installed torch-pruning-1.1.9
[0m

# Let's test the accuracy of the pruned model after the architecture modifications

In [18]:
test_model(pruned_model)

Accuracy of the network on the test images: 78.54935761912506
Total inference time for test data: 15247.58581161499
Mean inference time per test batch: 39.60411899120777
Standard deviation of inference times per test batch: 0.9577535976504113


# Arch pruned model reload check

In [19]:
reloaded_model = torch.load('./alexnet_fl102_arch_pruned_net.pth')

In [20]:
test_model(reloaded_model)

Accuracy of the network on the test images: 78.54935761912506
Total inference time for test data: 15244.30097579956
Mean inference time per test batch: 39.59558695012873
Standard deviation of inference times per test batch: 0.9561066116179376
