<a href="https://colab.research.google.com/github/Rohith-Rongali/IMP/blob/main/pruning_puzzles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class DNN(nn.Module):
    def __init__(self, dim_in, dim_out, width, depth):
        super(DNN, self).__init__()
        self.depth = depth
        self.layers = nn.ModuleList([nn.Linear(dim_in if i == 0 else width, width) for i in range(self.depth)])
        self.output_layer = nn.Linear(width, dim_out)
        self.relu = nn.ReLU()

    def forward(self, x):
        for layer in self.layers:
            x = self.relu(layer(x))
        x = self.output_layer(x)
        return x
model = DNN(4,2,10,3).to(device)

In [3]:
list(model.layers[0].named_parameters())

[('weight',
  Parameter containing:
  tensor([[-0.4045,  0.0129, -0.3640, -0.3780],
          [-0.3609,  0.0471, -0.3248,  0.3384],
          [-0.1249, -0.1702, -0.2273,  0.4971],
          [-0.2657, -0.0625,  0.2665,  0.1415],
          [ 0.3374, -0.1133,  0.1452,  0.1545],
          [ 0.2082, -0.3209, -0.2297, -0.4333],
          [-0.1745, -0.0883, -0.1926,  0.2258],
          [ 0.2431, -0.3264,  0.3408, -0.0567],
          [-0.2632,  0.4146,  0.2632, -0.0121],
          [-0.1269, -0.1803,  0.2200, -0.3744]], device='cuda:0',
         requires_grad=True)),
 ('bias',
  Parameter containing:
  tensor([ 0.4848, -0.0524, -0.0181, -0.3861, -0.2923, -0.1353, -0.0653,  0.1338,
           0.3049, -0.4887], device='cuda:0', requires_grad=True))]

In [4]:
list(model.layers[0].named_buffers())

[]

In [5]:
prune.random_unstructured(model.layers[0], name="weight", amount=0.3)

Linear(in_features=4, out_features=10, bias=True)

In [6]:
list(model.layers[0].named_parameters())

[('bias',
  Parameter containing:
  tensor([ 0.4848, -0.0524, -0.0181, -0.3861, -0.2923, -0.1353, -0.0653,  0.1338,
           0.3049, -0.4887], device='cuda:0', requires_grad=True)),
 ('weight_orig',
  Parameter containing:
  tensor([[-0.4045,  0.0129, -0.3640, -0.3780],
          [-0.3609,  0.0471, -0.3248,  0.3384],
          [-0.1249, -0.1702, -0.2273,  0.4971],
          [-0.2657, -0.0625,  0.2665,  0.1415],
          [ 0.3374, -0.1133,  0.1452,  0.1545],
          [ 0.2082, -0.3209, -0.2297, -0.4333],
          [-0.1745, -0.0883, -0.1926,  0.2258],
          [ 0.2431, -0.3264,  0.3408, -0.0567],
          [-0.2632,  0.4146,  0.2632, -0.0121],
          [-0.1269, -0.1803,  0.2200, -0.3744]], device='cuda:0',
         requires_grad=True))]

In [7]:
list(model.layers[0].named_buffers())

[('weight_mask',
  tensor([[1., 1., 1., 0.],
          [1., 0., 0., 1.],
          [1., 0., 1., 0.],
          [1., 0., 1., 0.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 0.],
          [0., 0., 0., 1.],
          [1., 1., 1., 1.],
          [1., 0., 1., 1.]], device='cuda:0'))]

In [8]:
model.layers[0].weight

tensor([[-0.4045,  0.0129, -0.3640, -0.0000],
        [-0.3609,  0.0000, -0.0000,  0.3384],
        [-0.1249, -0.0000, -0.2273,  0.0000],
        [-0.2657, -0.0000,  0.2665,  0.0000],
        [ 0.3374, -0.1133,  0.1452,  0.1545],
        [ 0.2082, -0.3209, -0.2297, -0.4333],
        [-0.1745, -0.0883, -0.1926,  0.0000],
        [ 0.0000, -0.0000,  0.0000, -0.0567],
        [-0.2632,  0.4146,  0.2632, -0.0121],
        [-0.1269, -0.0000,  0.2200, -0.3744]], device='cuda:0',
       grad_fn=<MulBackward0>)

In [17]:
w_o= model.layers[0].weight_orig.data

In [22]:
y = model(torch.randn(2, 4).to(device))
l = torch.nn.MSELoss()(y, torch.randn(2, 2).to(device))
l.backward()

In [23]:
model.layers[0].weight_orig.grad

tensor([[ 0.0053, -0.0051,  0.0068,  0.0000],
        [-0.0049,  0.0000,  0.0000, -0.0028],
        [-0.0205,  0.0000,  0.0024,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [-0.0222,  0.0195, -0.0142, -0.0064],
        [-0.0009,  0.0003,  0.0015,  0.0008],
        [-0.0140,  0.0075, -0.0036,  0.0000],
        [ 0.0000,  0.0000,  0.0000, -0.0074],
        [-0.0018,  0.0022, -0.0025, -0.0015],
        [ 0.0000,  0.0000,  0.0000,  0.0000]], device='cuda:0')

Gradient is zero for the pruned weights

In [None]:
model.layers[0].weight_orig

In [None]:
for hook in model.layers[0]._forward_pre_hooks.values():
    if hook._tensor_name == "weight":  # select out the correct hook
        break

print(list(hook))

[<torch.nn.utils.prune.RandomUnstructured object at 0x78f836e9e590>, <torch.nn.utils.prune.LnStructured object at 0x78f836e9e5f0>]


In [20]:
model.output_layer.weight

Parameter containing:
tensor([[ 0.1902, -0.2616, -0.3089, -0.0460,  0.0114, -0.0508,  0.1597,  0.2594,
         -0.2004, -0.0994],
        [ 0.0197, -0.2390,  0.1228, -0.0423,  0.0973,  0.2530, -0.2046, -0.1634,
          0.2472,  0.0101]], requires_grad=True)

In [21]:
prune.l1_unstructured(model.output_layer, name="weight", amount=0.1)

Linear(in_features=10, out_features=2, bias=True)

In [24]:
list(model.output_layer.named_parameters())

[('bias',
  Parameter containing:
  tensor([-0.0364, -0.0301], requires_grad=True)),
 ('weight_orig',
  Parameter containing:
  tensor([[ 0.1902, -0.2616, -0.3089, -0.0460,  0.0114, -0.0508,  0.1597,  0.2594,
           -0.2004, -0.0994],
          [ 0.0197, -0.2390,  0.1228, -0.0423,  0.0973,  0.2530, -0.2046, -0.1634,
            0.2472,  0.0101]], requires_grad=True))]