In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from mcunet.tinynas.mnasnet.searchspace import layer_choices

In [1]:
import numpy as np

In [4]:
array = np.array(
    np.meshgrid(
        layer_choices,
        layer_choices,
        layer_choices,
        layer_choices,
        layer_choices,
        layer_choices,
        layer_choices
    )
).T.reshape(-1, 7)

In [8]:
array.size

15309

In [1]:
from mcunet.tinynas.mnasnet.searchspace import sample_model, to_json

In [3]:
rng = np.random.default_rng()
model = sample_model(rng, 42)
to_json(model)

'{"n_classes": 42, "width_mult": 1.0, "resolution": 224, "dropout": 0.2, "blocks": [{"n_layers": 1, "in_channels": 32, "out_channels": 16, "first_stride": 1, "expand_ratio": 1, "conv_op": 0, "kernel_size": 7, "se_ratio": 0, "skip_op": 0}, {"n_layers": 3, "in_channels": 16, "out_channels": 24, "first_stride": 2, "expand_ratio": 6, "conv_op": 0, "kernel_size": 5, "se_ratio": 0.25, "skip_op": 1}, {"n_layers": 2, "in_channels": 24, "out_channels": 24, "first_stride": 2, "expand_ratio": 6, "conv_op": 1, "kernel_size": 5, "se_ratio": 0.25, "skip_op": 0}, {"n_layers": 4, "in_channels": 24, "out_channels": 64, "first_stride": 2, "expand_ratio": 6, "conv_op": 1, "kernel_size": 7, "se_ratio": 0.25, "skip_op": 1}, {"n_layers": 2, "in_channels": 64, "out_channels": 72, "first_stride": 1, "expand_ratio": 6, "conv_op": 0, "kernel_size": 5, "se_ratio": 0, "skip_op": 0}, {"n_layers": 3, "in_channels": 72, "out_channels": 200, "first_stride": 2, "expand_ratio": 6, "conv_op": 1, "kernel_size": 7, "se_ra

In [25]:
from mcunet.tinynas.proxylessnas.model import SuperProxylessNAS

In [26]:
super_nas = SuperProxylessNAS(1.0, 224, 43)

In [3]:
super_nas._switch.active = True

In [28]:
tensor = torch.randn(1, 3, 224, 224)
super_nas(tensor)

tensor([[-0.0464,  0.0339,  0.1892, -0.0848,  0.0913, -0.2680, -0.0901, -0.1498,
          0.1336, -0.1449, -0.1216, -0.1886, -0.3643, -0.0906, -0.1311,  0.1891,
         -0.3558, -0.0301, -0.0557, -0.2962,  0.0455,  0.0799,  0.2313, -0.1290,
         -0.1790,  0.1736,  0.1672,  0.1642, -0.1127, -0.0790,  0.2817,  0.1063,
         -0.1393, -0.0115, -0.1056,  0.0272, -0.0203,  0.2168,  0.0941, -0.0256,
         -0.0330, -0.1169, -0.0450]], grad_fn=<AddmmBackward0>)

In [3]:
import copy

import torch
import torch.nn as nn

from mcunet.tinynas.proxylessnas.old import *
from mcunet.tinynas.proxylessnas.layers import MixedOp, MixedOpSwitch

In [4]:
def build_candidate_ops(candidate_ops, in_channels, out_channels, stride, ops_order):
    if candidate_ops is None:
        raise ValueError('please specify a candidate set')

    name2ops = {
        'Identity': lambda in_C, out_C, S: IdentityLayer(in_C, out_C, ops_order=ops_order),
        'Zero': lambda in_C, out_C, S: ZeroLayer(stride=S),
    }
    # add MBConv layers
    name2ops.update({
        '3x3_MBConv1': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 3, S, 1),
        '3x3_MBConv2': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 3, S, 2),
        '3x3_MBConv3': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 3, S, 3),
        '3x3_MBConv4': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 3, S, 4),
        '3x3_MBConv5': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 3, S, 5),
        '3x3_MBConv6': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 3, S, 6),
        #######################################################################################
        '5x5_MBConv1': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 5, S, 1),
        '5x5_MBConv2': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 5, S, 2),
        '5x5_MBConv3': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 5, S, 3),
        '5x5_MBConv4': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 5, S, 4),
        '5x5_MBConv5': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 5, S, 5),
        '5x5_MBConv6': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 5, S, 6),
        #######################################################################################
        '7x7_MBConv1': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 7, S, 1),
        '7x7_MBConv2': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 7, S, 2),
        '7x7_MBConv3': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 7, S, 3),
        '7x7_MBConv4': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 7, S, 4),
        '7x7_MBConv5': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 7, S, 5),
        '7x7_MBConv6': lambda in_C, out_C, S: MBInvertedConvLayer(in_C, out_C, 7, S, 6),
    })

    return [
        name2ops[name](in_channels, out_channels, stride) for name in candidate_ops
    ]

In [5]:
conv_candidates = [
    '3x3_MBConv3', '3x3_MBConv6',
    '5x5_MBConv3', '5x5_MBConv6',
    '7x7_MBConv3', '7x7_MBConv6',
]

candidates = build_candidate_ops(conv_candidates, 1, 1, 1, "weight_bn_act")

for c in candidates:
    for m in c.modules():
        if isinstance(m, (nn.Conv2d, nn.BatchNorm2d, nn.Linear)):
            nn.init.ones_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)

candidates1 = copy.deepcopy(candidates)
candidates2 = copy.deepcopy(candidates)

In [6]:
old_mixed = MixedEdge(candidates1)

switch = MixedOpSwitch()
new_mixed = MixedOp(candidates2, switch)

new_mixed.arch_params = copy.deepcopy(old_mixed.AP_path_alpha)
new_mixed.binary_gates = copy.deepcopy(old_mixed.AP_path_wb)

In [7]:
old_mixed.AP_path_alpha == new_mixed.arch_params, old_mixed.AP_path_wb == new_mixed.binary_gates

(tensor([True, True, True, True, True, True]),
 tensor([True, True, True, True, True, True]))

In [8]:
old_mixed.active_index, new_mixed.active_index

([0], 0)

In [9]:
old_mixed.inactive_index, new_mixed.inactive_index

(None, [1, 2, 3, 4, 5])

In [10]:
old_mixed.log_prob, new_mixed.log_prob

(None, None)

In [11]:
old_mixed.current_prob_over_ops, new_mixed.current_prob_over_ops

(None, None)

In [12]:
old_mixed.probs_over_ops == new_mixed.softmax_arch_params()

tensor([True, True, True, True, True, True])

In [13]:
old_mixed.chosen_index, (new_mixed.chosen_index(), new_mixed.softmax_arch_params()[new_mixed.chosen_index()])

((0, 1.0), (0, tensor(1., grad_fn=<SelectBackward0>)))

In [14]:
old_mixed.chosen_op

MBInvertedConvLayer(
  (inverted_bottleneck): Sequential(
    (conv): Conv2d(1, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU6(inplace=True)
  )
  (depth_conv): Sequential(
    (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3, bias=False)
    (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU6(inplace=True)
  )
  (point_linear): Sequential(
    (conv): Conv2d(3, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [15]:
new_mixed.chosen_candidate()

MBInvertedConvLayer(
  (inverted_bottleneck): Sequential(
    (conv): Conv2d(1, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU6(inplace=True)
  )
  (depth_conv): Sequential(
    (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3, bias=False)
    (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU6(inplace=True)
  )
  (point_linear): Sequential(
    (conv): Conv2d(3, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [16]:
old_mixed.entropy() == new_mixed.entropy()

tensor(True)

In [17]:
tensor = torch.randn(1, 1, 8, 8)

In [18]:
old_forward = old_mixed(tensor)

In [19]:
new_forward = new_mixed(tensor)

In [20]:
old_forward == new_forward

tensor([[[[True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True]]]])

In [21]:
MixedEdge.MODE = "full_v2"
switch.active = True

# reset_binary_gates
old_mixed.binarize()
new_mixed.binarize()

In [22]:
old_forward = old_mixed(tensor)
new_forward = new_mixed(tensor)

In [23]:
old_forward == new_forward

tensor([[[[True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True],
          [True, True, True, True, True, True, True, True]]]])

In [34]:
from mcunet.tinynas.proxylessnas.old import *
from mcunet.tinynas.proxylessnas.layers import MixedOp, MixedOpSwitch

In [18]:
from collections.abc import Iterator

def init(net: nn.Module) -> None:
    for m in net.modules():
        if isinstance(m, (nn.Conv2d, nn.BatchNorm2d, nn.Linear)):
            nn.init.ones_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)


def parameters(
    named_parameters: list[tuple[str, Iterator]], exclude: list[str]
) -> Iterator[nn.Parameter]:
    for name, param in named_parameters:
        if name not in exclude:
            yield param

conv_candidates = [
#  "3x3_MBConv1"
   "3x3_MBConv3", "3x3_MBConv6",
   "5x5_MBConv3", "5x5_MBConv6",
   "7x7_MBConv3", "7x7_MBConv6",
]

candidates = build_candidate_ops(conv_candidates, 1, 1, 1, "weight_bn_act")

candidates1 = copy.deepcopy(candidates)
candidates2 = copy.deepcopy(candidates)

MixedEdge.MODE = None
mixed_old = MixedEdge(candidates1)

switch = MixedOpSwitch()
mixed_new = MixedOp(candidates2, switch)

mixed_new.arch_params = copy.deepcopy(mixed_old.AP_path_alpha)
mixed_new.binary_gates = copy.deepcopy(mixed_old.AP_path_wb)

image = torch.randn(1, 1, 8, 8)
label = torch.randn(1, 1, 8, 8)

net_old = nn.Sequential(
    nn.Conv2d(1, 1, 1),
    mixed_old
)

net_new = nn.Sequential(
    nn.Conv2d(1, 1, 1),
    mixed_new
)

init(net_old)
init(net_new)

criterion_old = nn.CrossEntropyLoss()
criterion_new = nn.CrossEntropyLoss()
adam_old = torch.optim.Adam(parameters(net_old.named_parameters(), ["AP_path_alpha", "AP_path_wb"]))
adam_new = torch.optim.Adam(parameters(net_new.named_parameters(), ["arch_params", "binary_gates"]))

_ = net_old.train()
_ = net_new.train()

In [19]:
MixedEdge.MODE = "full_v2"
switch.active = True

# reset_binary_gates
mixed_old.binarize()
mixed_new.binarize()

In [20]:
output_old = net_old(image)
loss_old = criterion_old(output_old, label)
net_old.zero_grad()
loss_old.backward()
mixed_old.set_arch_param_grad()
adam_old.step()

output_new = net_new(image)
loss_new = criterion_new(output_new, label)
net_new.zero_grad()
loss_new.backward()
mixed_new.set_arch_param_grad()
adam_new.step()

In [37]:
torch.allclose(output_old, output_new)

True

In [9]:
loss_old == loss_new

tensor(True)

In [10]:
mixed_old.AP_path_alpha == mixed_new.arch_params

tensor([True, True, True, True, True, True])

In [11]:
mixed_old.AP_path_wb == mixed_new.binary_gates

tensor([ True,  True,  True, False, False,  True])

In [12]:
mixed_old.AP_path_alpha.grad == mixed_new.arch_params.grad

tensor([True, True, True, True, True, True])

In [13]:
mixed_old.AP_path_wb.grad == mixed_new.binary_gates.grad

tensor([True, True, True, True, True, True])