In [3]:
import argparse
import os, sys
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models

import random
import numpy as np
from collections import OrderedDict


In [4]:
dataset = 'cifar100'
depth = 50
batch_size = 64
test_batch_size = 256
epochs = 50
lr = 0.001
log_interval = 100
save = '/kaggle/working/'
arch = 'resnet'
seed = 1
momentum = 0.9
weight_decay = 1e-4

In [5]:
import errno
import math
import os
import sys
import time

import torch
import torch.nn as nn
import torch.nn.init as init
from torch.autograd import Variable


__all__ = ['get_mean_and_std', 'init_params', 'mkdir_p', 'AverageMeter']

def get_mean_and_std(dataset):
    '''Compute the mean and std value of dataset.'''
    dataloader = trainloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)

    mean = torch.zeros(3)
    std = torch.zeros(3)
    print('==> Computing mean and std..')
    for inputs, targets in dataloader:
        for i in range(3):
            mean[i] += inputs[:,i,:,:].mean()
            std[i] += inputs[:,i,:,:].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean, std

def get_conv_zero_param(model):
    total = 0
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            total += torch.sum(m.weight.data.eq(0))
    return total

def init_params(net):
    '''Init layer parameters.'''
    for m in net.modules():
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal(m.weight, mode='fan_out')
            if m.bias:
                init.constant(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            init.constant(m.weight, 1)
            init.constant(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.normal(m.weight, std=1e-3)
            if m.bias:
                init.constant(m.bias, 0)

def mkdir_p(path):
    '''make dir if not exist'''
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

class AverageMeter(object):
    """Computes and stores the average and current value
       Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [6]:
batch_size = 64
train_loader = torch.utils.data.DataLoader(
        datasets.CIFAR100('./data.cifar100', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.Pad(4),
                           transforms.RandomCrop(32),
                           transforms.RandomHorizontalFlip(),
                           transforms.ToTensor(),
                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                       ])),
        batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR100('./data.cifar100', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                       ])),
        batch_size=batch_size, shuffle=True)

Files already downloaded and verified


In [7]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import partial
from torch.autograd import Variable


__all__ = ['resnet']

def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, cfg, stride=1, downsample=None):
        # cfg should be a number in this case
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, cfg, stride)
        self.bn1 = nn.BatchNorm2d(cfg)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(cfg, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

def downsample_basic_block(x, planes):
    x = nn.AvgPool2d(2,2)(x)
    zero_pads = torch.Tensor(
        x.size(0), planes - x.size(1), x.size(2), x.size(3)).zero_()
    if isinstance(x.data, torch.cuda.FloatTensor):
        zero_pads = zero_pads.cuda()

    out = Variable(torch.cat([x.data, zero_pads], dim=1))

    return out

class ResNet(nn.Module):

    def __init__(self, depth, dataset='cifar10', cfg=None):
        super(ResNet, self).__init__()
        # Model type specifies number of layers for CIFAR-10 model
        assert (depth - 2) % 6 == 0, 'depth should be 6n+2'
        n = (depth - 2) // 6

        block = BasicBlock
        if cfg == None:
            cfg = [[16]*n, [32]*n, [64]*n]
            cfg = [item for sub_list in cfg for item in sub_list]

        self.cfg = cfg

        self.inplanes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, n, cfg=cfg[0:n])
        self.layer2 = self._make_layer(block, 32, n, cfg=cfg[n:2*n], stride=2)
        self.layer3 = self._make_layer(block, 64, n, cfg=cfg[2*n:3*n], stride=2)
        self.avgpool = nn.AvgPool2d(8)
        if dataset == 'cifar10':
            num_classes = 10
        elif dataset == 'cifar100':
            num_classes = 100
        self.fc = nn.Linear(64 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, cfg, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = partial(downsample_basic_block, planes=planes*block.expansion)

        layers = []
        layers.append(block(self.inplanes, planes, cfg[0], stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, cfg[i]))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)    # 32x32

        x = self.layer1(x)  # 32x32
        x = self.layer2(x)  # 16x16
        x = self.layer3(x)  # 8x8

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

def resnet(**kwargs):
    """
    Constructs a ResNet model.
    """
    return ResNet(**kwargs)

if __name__ == '__main__':
    net = resnet(depth=50)

In [8]:
model = resnet(dataset=dataset, depth=depth)

In [14]:
device = 'cuda'

In [15]:
model.load_state_dict(torch.load('/kaggle/input/base_resnet50/pytorch/default/1/resnet_model.pth', weights_only = True))
model.to(device)

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=Fals

In [16]:
from torch.profiler import profile, ProfilerActivity

def test():
    model.eval()
    test_loss = 0
    correct = 0

    # Start profiling
    with profile(
        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
        record_shapes=True,
        profile_memory=True,
        with_stack=True,
    ) as prof:  # Profiler object initialized here
        with torch.no_grad():  # Use torch.no_grad() for inference
            for data, target in test_loader:
                data, target = data.cuda(), target.cuda()
                output = model(data)
                test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum batch loss
                pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()

    prof.export_chrome_trace("trace.json")

    # Calculate test loss and accuracy
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    return correct / len(test_loader.dataset)

In [17]:
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()

In [19]:
import torch.nn.utils.prune as prune
import torch.quantization as quant
from torch.profiler import profile, record_function, ProfilerActivity
import time

def profile_model(model, data_loader, num_batches=10):
    model.eval()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    with profile(
        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
        on_trace_ready=torch.profiler.tensorboard_trace_handler('./logs'),
        record_shapes=True,
        with_stack=True
    ) as prof:
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            if batch_idx >= num_batches:
                break
            inputs, targets = inputs.to(device), targets.to(device)

            start_time = time.time()
            with record_function("model_inference"):
                outputs = model(inputs)
            latency = time.time() - start_time

    print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
    print(f"\nLatency per batch: {latency:.4f} seconds")
    print(f"Throughput: {len(inputs) / latency:.4f} samples/second")
    print(f"Peak GPU memory usage: {torch.cuda.max_memory_allocated(device) / (1024 * 1024):.2f} MB")
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model parameter count: {total_params}")
    print(f"FLOPs estimation: {total_params * 2:.2e} FLOPs")



In [20]:
profile_model(model,test_loader)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.00%       0.000us         0.00%       0.000us       0.000us     197.622ms        61.39%     197.622ms      19.762ms            10  
                                        model_inference        11.32%      58.563ms        39.65%     205.148ms      20.515ms       0.000us         0.00%     123.634ms      12.363ms            10  
         

# Pruning

In [21]:
total = 0
percent = 0.6
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        total += m.weight.data.numel()
conv_weights = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        size = m.weight.data.numel()
        conv_weights[index:(index+size)] = m.weight.data.view(-1).abs().clone()
        index += size

y, i = torch.sort(conv_weights)
thre_index = int(total * percent)
thre = y[thre_index]
pruned = 0
print('Pruning threshold: {}'.format(thre))
zero_flag = False
for k, m in enumerate(model.modules()):
    if isinstance(m, nn.Conv2d):
        weight_copy = m.weight.data.abs().clone()
        mask = weight_copy.gt(thre).float().cuda()
        pruned = pruned + mask.numel() - torch.sum(mask)
        m.weight.data.mul_(mask)
        if int(torch.sum(mask)) == 0:
            zero_flag = True
        print('layer index: {:d} \t total params: {:d} \t remaining params: {:d}'.
            format(k, mask.numel(), int(torch.sum(mask))))
print('Total conv params: {}, Pruned conv params: {}, Pruned ratio: {}'.format(total, pruned, pruned/total))

Pruning threshold: 0.054346729069948196
layer index: 1 	 total params: 432 	 remaining params: 274
layer index: 6 	 total params: 2304 	 remaining params: 1526
layer index: 9 	 total params: 2304 	 remaining params: 1465
layer index: 12 	 total params: 2304 	 remaining params: 1530
layer index: 15 	 total params: 2304 	 remaining params: 1503
layer index: 18 	 total params: 2304 	 remaining params: 1488
layer index: 21 	 total params: 2304 	 remaining params: 1508
layer index: 24 	 total params: 2304 	 remaining params: 1474
layer index: 27 	 total params: 2304 	 remaining params: 1479
layer index: 30 	 total params: 2304 	 remaining params: 1468
layer index: 33 	 total params: 2304 	 remaining params: 1494
layer index: 36 	 total params: 2304 	 remaining params: 1479
layer index: 39 	 total params: 2304 	 remaining params: 1501
layer index: 42 	 total params: 2304 	 remaining params: 1467
layer index: 45 	 total params: 2304 	 remaining params: 1486
layer index: 48 	 total params: 230

In [22]:
print('Accuracy after pruning')
acc = test()

Accuracy after pruning

Test set: Average loss: 563.7897, Accuracy: 100/10000 (1.0%)



In [23]:
def save_checkpoint(state, is_best, filepath):
    torch.save(state, os.path.join(filepath, 'checkpoint.pth.tar'))
    if is_best:
        shutil.copyfile(os.path.join(filepath, 'checkpoint.pth.tar'), os.path.join(filepath, 'model_best.pth.tar'))

In [24]:
save_checkpoint({
        'epoch': 0,
        'state_dict': model.state_dict(),
        'best_prec1': 0,
        'optimizer': optimizer.state_dict(),
        'cfg': model.cfg
    }, is_best=0, filepath=save)

# Fine tune pruned model

In [25]:
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()

In [33]:
def train(epoch):
    model.train()
    avg_loss = 0.
    train_acc = 0.
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        
        optimizer.zero_grad()
        
        # Forward pass: Compute model output
        output = model(data)
        
        # Compute loss
        loss = F.cross_entropy(output, target)
        avg_loss = avg_loss + loss.item()
        
        # Use torch.no_grad() for accuracy computation (no gradients required here)
        with torch.no_grad():
            pred = output.data.max(1, keepdim=True)[1]  # Get index of max log-probability
            train_acc += pred.eq(target.data.view_as(pred)).cpu().sum()

        # Backpropagate the loss and optimize the model
        loss.backward()
        optimizer.step()
        
        # Log progress
        if batch_idx % log_interval == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.1f}%)]\tLoss: {loss.item():.6f}')


In [35]:
best_prec1 = 0.
epochs = 50
model.train()
model.to(device)
for epoch in range(0, epochs):
    train(epoch)
    prec1 = test()
    is_best = prec1 > best_prec1
    best_prec1 = max(prec1, best_prec1)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
        'optimizer': optimizer.state_dict(),
        'cfg': model.cfg
    }, is_best, filepath=save)
    


Test set: Average loss: 3.9906, Accuracy: 855/10000 (8.6%)


Test set: Average loss: 3.8529, Accuracy: 1044/10000 (10.4%)


Test set: Average loss: 3.7762, Accuracy: 1210/10000 (12.1%)


Test set: Average loss: 3.6693, Accuracy: 1315/10000 (13.2%)


Test set: Average loss: 3.5733, Accuracy: 1487/10000 (14.9%)


Test set: Average loss: 3.4863, Accuracy: 1661/10000 (16.6%)


Test set: Average loss: 3.3734, Accuracy: 1858/10000 (18.6%)


Test set: Average loss: 3.3192, Accuracy: 1966/10000 (19.7%)


Test set: Average loss: 3.2883, Accuracy: 1979/10000 (19.8%)


Test set: Average loss: 3.1777, Accuracy: 2225/10000 (22.2%)


Test set: Average loss: 3.1573, Accuracy: 2269/10000 (22.7%)


Test set: Average loss: 3.1200, Accuracy: 2316/10000 (23.2%)


Test set: Average loss: 3.0561, Accuracy: 2403/10000 (24.0%)


Test set: Average loss: 2.9425, Accuracy: 2683/10000 (26.8%)


Test set: Average loss: 2.8798, Accuracy: 2755/10000 (27.6%)


Test set: Average loss: 2.8508, Accuracy: 2845/10000 (28

In [36]:
print('Accuracy after Finetuning pruned Model')
acc = test()

Accuracy after Finetuning pruned Model

Test set: Average loss: 1.9913, Accuracy: 4719/10000 (47.2%)



In [37]:
profile_model(model, test_loader)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.00%       0.000us         0.00%       0.000us       0.000us     169.943ms        57.74%     169.943ms      16.994ms            10  
                                        model_inference         9.87%      47.752ms        36.04%     174.336ms      17.434ms       0.000us         0.00%     123.699ms      12.370ms            10  
         