In [1]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F

## Create a model

In this tutorial, we use the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf) architecture from 
LeCun et al., 1998.



In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square conv kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5x5 image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, int(x.nelement() / x.shape[0]))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = LeNet().to(device=device)

In [3]:
module = model.conv1
print(module.weight.shape) # 'weight' is a parameter of the module
print(list(module.named_parameters())) # 'weight' and 'bias'

torch.Size([6, 1, 5, 5])
[('weight', Parameter containing:
tensor([[[[-1.3387e-01, -2.6966e-02, -4.0916e-02,  1.1125e-01, -7.2837e-02],
          [-8.0426e-02, -8.1753e-02,  5.5729e-02, -1.0503e-01, -1.8089e-01],
          [-7.1937e-02,  4.1310e-03,  7.1424e-03,  9.8715e-02, -8.3692e-02],
          [ 1.0994e-01, -5.6701e-04,  1.0802e-01,  1.4941e-01,  2.6358e-02],
          [ 2.6095e-02, -1.8742e-01,  1.4019e-01,  1.0085e-01, -1.8469e-01]]],


        [[[-1.6272e-01,  1.8860e-01,  1.5586e-01, -1.5765e-01,  1.4668e-01],
          [ 5.4128e-02, -9.1188e-02, -2.6759e-02, -6.6432e-02,  4.7913e-02],
          [ 1.3219e-01, -3.8145e-02,  1.1763e-01, -1.2220e-01, -3.1650e-02],
          [ 1.3705e-01,  1.7972e-01, -6.2877e-02,  1.4389e-01, -1.3510e-01],
          [ 6.8367e-02,  1.7308e-01,  3.2380e-02, -4.5484e-02,  1.7421e-02]]],


        [[[ 2.8648e-02, -1.9839e-01, -1.3311e-01,  1.4248e-01, -1.4888e-01],
          [ 1.5500e-01, -5.6221e-02, -1.6254e-02,  2.4293e-02, -1.2622e-01],
         

In [4]:
# 아직 prune을 하지 않았기 때문에, pruning mask가 없음.
print(list(module.named_buffers())) # 'weight_mask' and 'bias_mask'

[]


In [5]:
# L1 Norm 기준으로 50% pruning하여, [6, 1, 5, 5]shape tensor를 [3, 1, 5, 5] shape tensor로 만듦.
prune.LnStructured.apply(module, name="weight", amount=0.5, n=1, dim=0)
print(module.weight) # 6, 1, 5, 5

# L1 norm이 0인 filter의 index를 반환
pruning_filter_idx_list = []
saving_filter_idx_list = []
for i, filter in enumerate(module.weight):
    if torch.sum(filter) == 0:
        pruning_filter_idx_list.append(i)
    else : 
        saving_filter_idx_list.append(i)
        
print(pruning_filter_idx_list)
print(saving_filter_idx_list)
pruning_filter_idx_list = torch.tensor(pruning_filter_idx_list).to(device=device)
saving_filter_idx_list = torch.tensor(saving_filter_idx_list).to(device=device)

tensor([[[[-0.0000, -0.0000, -0.0000,  0.0000, -0.0000],
          [-0.0000, -0.0000,  0.0000, -0.0000, -0.0000],
          [-0.0000,  0.0000,  0.0000,  0.0000, -0.0000],
          [ 0.0000, -0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000, -0.0000,  0.0000,  0.0000, -0.0000]]],


        [[[-0.1627,  0.1886,  0.1559, -0.1577,  0.1467],
          [ 0.0541, -0.0912, -0.0268, -0.0664,  0.0479],
          [ 0.1322, -0.0381,  0.1176, -0.1222, -0.0317],
          [ 0.1370,  0.1797, -0.0629,  0.1439, -0.1351],
          [ 0.0684,  0.1731,  0.0324, -0.0455,  0.0174]]],


        [[[ 0.0286, -0.1984, -0.1331,  0.1425, -0.1489],
          [ 0.1550, -0.0562, -0.0163,  0.0243, -0.1262],
          [-0.0882,  0.1873, -0.0074,  0.0528,  0.1910],
          [ 0.0728, -0.1912, -0.1587, -0.1206,  0.0358],
          [ 0.1837,  0.1367,  0.0183,  0.0558,  0.1913]]],


        [[[-0.0000,  0.0000, -0.0000, -0.0000, -0.0000],
          [-0.0000, -0.0000,  0.0000, -0.0000,  0.0000],
          [ 0.0000,

In [6]:
# pruning_filter_index에 해당하는 filter를 제거하여 새로운 tensor 생성
print(module.weight.device)
print(torch.Tensor(pruning_filter_idx_list).device)

saveing_filter_idx_list = []

pruned_filter = torch.index_select(module.weight, 0, saving_filter_idx_list)
print(pruned_filter)

cuda:0
cuda:0
tensor([[[[-0.1627,  0.1886,  0.1559, -0.1577,  0.1467],
          [ 0.0541, -0.0912, -0.0268, -0.0664,  0.0479],
          [ 0.1322, -0.0381,  0.1176, -0.1222, -0.0317],
          [ 0.1370,  0.1797, -0.0629,  0.1439, -0.1351],
          [ 0.0684,  0.1731,  0.0324, -0.0455,  0.0174]]],


        [[[ 0.0286, -0.1984, -0.1331,  0.1425, -0.1489],
          [ 0.1550, -0.0562, -0.0163,  0.0243, -0.1262],
          [-0.0882,  0.1873, -0.0074,  0.0528,  0.1910],
          [ 0.0728, -0.1912, -0.1587, -0.1206,  0.0358],
          [ 0.1837,  0.1367,  0.0183,  0.0558,  0.1913]]],


        [[[ 0.1235, -0.0855,  0.0328, -0.1080, -0.1285],
          [ 0.1280, -0.1653,  0.1890,  0.0902, -0.0385],
          [ 0.0724, -0.0905,  0.0305, -0.0274, -0.0871],
          [ 0.1415,  0.0167,  0.1468, -0.0960, -0.0765],
          [ 0.0157,  0.1165, -0.0418, -0.1827,  0.1084]]]], device='cuda:0',
       grad_fn=<IndexSelectBackward0>)


In [7]:
print(f"weight.shape before pruning: {module.weight.shape}")
print(f"weight.shape after pruning: {pruned_filter.shape}")

weight.shape before pruning: torch.Size([6, 1, 5, 5])
weight.shape after pruning: torch.Size([3, 1, 5, 5])


In [8]:
import sys
from architecture2 import VGG16_BN
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG16_BN()
checkpoint = torch.load('./vgg16_baseline_exp4/checkpoint/best_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)

In [9]:
layer=1
current_layer = (getattr(model, f'conv{layer}'))
# next_conv_layer = getattr(model, f'conv{layer+1}')
print(current_layer)
# print(next_conv_layer)

# print current conv layer's weight, bias, BN's weight, bias
conv_layer = current_layer[0]
bn_layer = current_layer[1]

print(conv_layer.weight.shape)
print(conv_layer.bias.shape)
print(bn_layer.weight.shape)
print(bn_layer.bias.shape)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
torch.Size([64, 3, 3, 3])
torch.Size([64])
torch.Size([64])
torch.Size([64])


In [10]:
with torch.no_grad():
    conv_layer.weight[0][0][0][0] = 0
print(model.conv1[0].weight)

Parameter containing:
tensor([[[[ 0.0000e+00,  3.5564e-02,  2.0281e-02],
          [ 4.5183e-02,  5.6363e-02,  3.9902e-02],
          [ 2.4126e-02,  2.9762e-02,  1.9486e-02]],

         [[-4.4796e-03, -2.7185e-03, -1.1659e-02],
          [ 2.2306e-03,  4.8387e-03, -2.5939e-03],
          [-1.6340e-02, -1.6839e-02, -1.8054e-02]],

         [[ 1.8749e-02,  1.6790e-02, -2.8113e-03],
          [ 9.8192e-03,  8.3820e-03, -4.5390e-03],
          [-2.4398e-02, -2.6830e-02, -2.9554e-02]]],


        [[[-7.4711e-02, -9.9932e-02, -1.3853e-01],
          [ 1.4360e-01, -4.6040e-01,  2.1396e-01],
          [ 1.6098e-01,  2.1157e-01,  8.5501e-02]],

         [[ 6.2394e-02,  1.0577e-01,  2.4002e-02],
          [ 1.2390e-01, -4.9478e-01,  2.4907e-01],
          [-1.6924e-02, -5.1053e-02, -8.4648e-02]],

         [[-3.0103e-02,  2.4248e-01, -2.9110e-02],
          [ 1.2702e-01, -2.8135e-01,  2.3965e-01],
          [-6.9359e-02, -6.5029e-02, -8.9060e-02]]],


        [[[ 1.6392e-02,  1.5533e-02,  9.9450

In [11]:
# current layer's weight, bias, BN 
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        print(name)
        print(module.weight.shape)
        print(module.bias.shape)
    elif isinstance(module, torch.nn.BatchNorm2d):
        print(name)
        print(module.weight.shape)        # gamma
        print(module.bias.shape)          # beta
        print (module.running_mean.shape) # test시에 사용되기 위한 exponentially mean
        print (module.running_var.shape)  # test시에 사용되기 위한 exponentially variance
        
    elif isinstance(module, torch.nn.Linear):
        print(name)
        print(module.weight.shape)
        print(module.bias.shape)

conv1.0
torch.Size([64, 3, 3, 3])
torch.Size([64])
conv1.1
torch.Size([64])
torch.Size([64])
torch.Size([64])
torch.Size([64])
conv2.0
torch.Size([64, 64, 3, 3])
torch.Size([64])
conv2.1
torch.Size([64])
torch.Size([64])
torch.Size([64])
torch.Size([64])
conv3.0
torch.Size([128, 64, 3, 3])
torch.Size([128])
conv3.1
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([128])
conv4.0
torch.Size([128, 128, 3, 3])
torch.Size([128])
conv4.1
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([128])
conv5.0
torch.Size([256, 128, 3, 3])
torch.Size([256])
conv5.1
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
conv6.0
torch.Size([256, 256, 3, 3])
torch.Size([256])
conv6.1
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
conv7.0
torch.Size([256, 256, 3, 3])
torch.Size([256])
conv7.1
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
conv8.0
torch.Size([512, 256, 3, 3])
torch.Size([512])
conv8.1
torch.Si

In [15]:
#  bn_layer.running_mean  vs bn_layer.running_mean.data
current_layer = getattr(model, f'conv{1}') 
conv_layer = current_layer[0]
bn_layer = current_layer[1]

bn_layer_mean = bn_layer.running_mean
bn_layer_mean_data = bn_layer.running_mean.data

print(bn_layer_mean)
print(bn_layer_mean_data)

tensor([ 1.9831e-03,  6.9053e-04,  1.2846e-03, -7.9381e-04,  6.3807e-03,
        -3.2694e-03,  3.5078e-03, -7.4481e-06, -2.7154e-03, -1.4607e-03,
        -2.1817e-03,  6.1711e-04,  1.1301e-05,  1.8088e-03,  1.4820e-03,
        -1.1569e-03,  1.4506e-03,  1.2601e-03, -5.2862e-06,  1.6401e-03,
        -3.2623e-04,  3.1609e-03, -6.7862e-06,  3.2926e-05,  2.3428e-04,
         4.8104e-07, -2.5539e-03,  2.0405e-05,  8.1100e-06, -3.5614e-03,
         4.0980e-03, -5.7324e-03, -2.0118e-03, -2.0995e-03,  3.1241e-03,
         2.0389e-05, -3.0720e-04, -1.1179e-03,  9.0925e-06,  1.5667e-04,
        -4.6267e-03,  2.0899e-03, -2.4335e-03, -1.0357e-02, -3.0892e-03,
         4.5506e-04, -1.2753e-03,  3.5382e-03,  2.2312e-03, -1.1330e-03,
        -3.9815e-04, -5.9765e-03, -2.9159e-03, -1.3330e-04,  1.6760e-04,
         2.4183e-03, -7.9425e-05,  8.7930e-05, -4.8635e-03, -2.4842e-03,
        -6.7093e-04,  2.6997e-03,  7.0680e-04, -5.0599e-04], device='cuda:0')
tensor([ 1.9831e-03,  6.9053e-04,  1.2846e-03,

In [17]:
import torch
from torch.nn.parameter import Parameter

# Creating a running mean parameter
running_mean = Parameter(torch.tensor([1.0, 2.0, 3.0]))

# Accessing the entire Parameter object
print("Original Parameter object:")
print(running_mean)

# Modifying the running mean using in-place operation on running_mean.data
running_mean.data += 1.0

# Accessing the modified running mean
print("\nModified running mean using running_mean.data:")
print(running_mean)


Original Parameter object:
Parameter containing:
tensor([1., 2., 3.], requires_grad=True)


RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.

In [22]:
# load pickle file
import pickle
import torch
import numpy as np
import matplotlib.pyplot as plt

with open('./Figure2/b/top1_acc_list.pkl', 'rb') as f:
    top1_acc_list = pickle.load(f)
with open('./Figure2/b/top5_acc_list.pkl', 'rb') as f:
    top5_acc_list = pickle.load(f)
    
print(top1_acc_list)
print(top5_acc_list)

{0: [91.9, 91.9, 91.9, 91.79, 91.85, 91.54]}
{0: [99.42, 99.42, 99.43, 99.42, 99.38, 99.31]}
