In [1]:
import torchvision.models as models


import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from models import model_loader
from data.dataloaders import imagenet

In [2]:
train_loader, val_loader, num_c = imagenet()

Using augmented IMAGENET.


In [3]:
for X, y in train_loader:
    print(X.shape)
    print(y.shape)
    break

torch.Size([100, 3, 224, 224])
torch.Size([100])


In [2]:
device = 'cuda:%d' % 6

In [4]:
resnet18 = model_loader()

resnet18.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [5]:
def compute_percentile(param, ratio):
    lambda_ = 0
    with torch.no_grad(): 
        lambda_ = param.abs().view(-1).sort()[0][int(ratio * param.abs().view(-1).shape[0])]
    return lambda_

def get_mask(param, ratio):
    mask = torch.ones_like(param)
    lamb_ = compute_percentile(param, ratio)
    with torch.no_grad():
        mask[torch.abs(param) <= lamb_] = 0
    return mask, lamb_
        
def compute_module(module, ratio):
    for name, param in module.named_parameters():
        if 'weight' in name:
            mask, lamb_ = get_mask(param, ratio)
    return mask, lamb_

def compute_mask(model, ratio):
    named_mask = {}
    named_lamb = {}
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            named_mask[name], named_lamb[name] = compute_module(module, ratio)

        if isinstance(module, torch.nn.Linear):
            print('Linear')
    return named_mask, named_lamb


def regularize_model(named_mask, model):
    with torch.no_grad():
        for name, module in model.named_modules():
            if isinstance(module, torch.nn.Conv2d):
                for n, p in module.named_parameters():
                    if 'weight' in n:
                        p.data = p.data * named_mask[name]


regularize_model(mask, resnet18)

In [6]:
ratio = 0.7

mask, lamb = compute_mask(resnet18, ratio)

Linear


In [7]:
mask['conv1'][0,0]

tensor([[0., 0., 0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 1., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 1., 0., 0., 0., 0.]], device='cuda:6')

In [9]:
resnet18.conv1.weight[0,0]

tensor([[-0.0050,  0.0199, -0.0189, -0.0287, -0.0151, -0.0066, -0.0025],
        [ 0.0063,  0.0560, -0.0092,  0.0180, -0.0158, -0.0100, -0.0324],
        [-0.0376, -0.0235, -0.0001, -0.0036, -0.0137, -0.0066,  0.0132],
        [ 0.0362,  0.0020,  0.0419, -0.0630, -0.0050,  0.0228,  0.0120],
        [-0.0113,  0.0153, -0.0064,  0.0068,  0.0119,  0.0197, -0.0072],
        [ 0.0015,  0.0105, -0.0006, -0.0177,  0.0173, -0.0294,  0.0094],
        [ 0.0078, -0.0062, -0.0267,  0.0082, -0.0142, -0.0167, -0.0137]],
       device='cuda:6', grad_fn=<SelectBackward>)

In [10]:
regularize_model(mask, resnet18)

In [11]:
resnet18.conv1.weight[0,0]

tensor([[-0.0000,  0.0000, -0.0000, -0.0287, -0.0000, -0.0000, -0.0000],
        [ 0.0000,  0.0560, -0.0000,  0.0000, -0.0000, -0.0000, -0.0324],
        [-0.0376, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,  0.0000],
        [ 0.0362,  0.0000,  0.0419, -0.0630, -0.0000,  0.0000,  0.0000],
        [-0.0000,  0.0000, -0.0000,  0.0000,  0.0000,  0.0000, -0.0000],
        [ 0.0000,  0.0000, -0.0000, -0.0000,  0.0000, -0.0294,  0.0000],
        [ 0.0000, -0.0000, -0.0267,  0.0000, -0.0000, -0.0000, -0.0000]],
       device='cuda:6', grad_fn=<SelectBackward>)

In [9]:
for name, module in resnet18.named_modules():
    print(name)
    if name == 'layer1.1.conv2':
        break

for name, param in module.named_parameters():
    print(name)


conv1
bn1
relu
maxpool
layer1
layer1.0
layer1.0.conv1
layer1.0.bn1
layer1.0.relu
layer1.0.conv2
layer1.0.bn2
layer1.1
layer1.1.conv1
layer1.1.bn1
layer1.1.relu
layer1.1.conv2
weight


In [14]:
for name, param in module.named_parameters():
    print(name)
    if 'weight' in name:

    print(param.size())

weight
torch.Size([64, 3, 7, 7])


In [26]:
sparse_ratio = 0.3

In [31]:
mask.shape

torch.Size([64, 3, 7, 7])

In [None]:
!python train_resnet.py