In [2]:
# https://www.youtube.com/watch?v=fR_0o25kigM&list=WL&index=6&t=2070s&ab_channel=AladdinPersson

import torch
import torch.nn as nn
from math import ceil
from torchinfo import summary
import quant_module as qm
# import quant_module as qm

__all__ = ['mixeffnet_b0_w1234a234', 'mixeffnet_b0_w1234a234_100', 'mixeffnet_b0_w248a248_chan',"mixeffnet_b0_w2468a2468_100",
           'mixeffnet_b3_w2468a2468_100']

base_model = [
    # expand_ratio, channels, repeats, stride, kernel_size
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3],
]

phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}


class BasicCNNBlock(nn.Module):

    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicCNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
        self.silu = nn.SiLU()  # SiLU <-> Swish
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return self.silu(x)

class CNNBlock(nn.Module):
    def __init__(
        self, conv_func, in_channels, out_channels, 
        kernel_size, stride, padding, groups=1, **kwargs
    ):
        super(CNNBlock, self).__init__()
        self.cnn = conv_func(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            bias=False,
            **kwargs,
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU()  # SiLU <-> Swish

    def forward(self, x):
        out = self.cnn(x)
        out = self.silu(self.bn(out))
        return out
    # def forward(self, x):
    #     return self.silu(self.bn(self.cnn(x)))


class SqueezeExcitation(nn.Module):
    def __init__(self, conv_func, in_channels, reduced_dim, **kwargs):
        super(SqueezeExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),  # C x H x W -> C x 1 x 1
            conv_func(
                in_channels, reduced_dim, kernel_size=1, bias=False, **kwargs
            ),  # C x 1 x 1 -> C_reduced x 1 x 1
            nn.SiLU(),  # SiLU <-> Swish
            conv_func(
                reduced_dim, in_channels, kernel_size=1, bias=False, **kwargs
            ),  # C_reduced x 1 x 1 -> C x 1 x 1
            nn.Sigmoid(),
        )

    def forward(self, x):
        return x * self.se(x)


class InvertedResidualBlock(nn.Module):
    def __init__(
            self,
            conv_func,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            expand_ratio,
            reduction=4,  # squeeze excitation
            survival_prob=0.8,  # for stochastic depth
             **kwargs,
    ):
        super(InvertedResidualBlock, self).__init__()
        self.survival_prob = survival_prob
        self.use_residual = in_channels == out_channels and stride == 1
        hidden_dim = in_channels * expand_ratio
        self.expand = in_channels != hidden_dim
        reduced_dim = int(in_channels / reduction)

        if self.expand:
            self.expand_conv = CNNBlock(
                conv_func, in_channels, hidden_dim, kernel_size=3, stride=1, padding=1, **kwargs,
            )

        self.conv = nn.Sequential(
            CNNBlock(
                conv_func, hidden_dim, hidden_dim, kernel_size, stride, padding, groups=hidden_dim, **kwargs,
            ),
            SqueezeExcitation(conv_func, hidden_dim, reduced_dim, **kwargs,),
            conv_func(hidden_dim, out_channels, kernel_size=1, bias=False, **kwargs,),
            nn.BatchNorm2d(out_channels,),
        )

    def stochastic_depth(self, x):
        if not self.training:
            return x

        binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
        return torch.div(x, self.survival_prob) * binary_tensor
    
    def forward(self, inputs):
        x = self.expand_conv(inputs) if self.expand else inputs
        
        if self.use_residual:
            # return self.stochastic_depth(self.conv(x)) + inputs
            if self.expand:
                return self.stochastic_depth(self.conv(x)) + self.expand_conv.cnn.quant_skip
            else:
                return self.stochastic_depth(self.conv(x)) + self.conv[0].cnn.quant_skip
        else:
            return self.conv(x)
        



class EfficientNet(nn.Module):

    def __init__(self, conv_func, version, num_classes=1000, **kwargs):
        if 'abits' in kwargs:
            print('abits: {}'.format(kwargs['abits']))
        if 'wbits' in kwargs:
            print('wbits: {}'.format(kwargs['wbits']))
        self.conv_func = conv_func
        super(EfficientNet, self).__init__()
        width_factor, depth_factor, dropout_rate, res = self.calculate_factors(version)
        self.res = res
        last_channel = ceil(1280 * width_factor)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(conv_func, width_factor, depth_factor, last_channel, **kwargs)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(last_channel, num_classes),
        )

    def calculate_factors(self, version, alpha=1.2, beta=1.1):
        phi, res, drop_rate = phi_values[version]
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return width_factor, depth_factor, drop_rate, res
    
    def create_features(self, conv_func, width_factor, depth_factor, last_channel, **kwargs):
        channels = int(32 * width_factor)
        features = [BasicCNNBlock(3, channels, kernel_size=3, stride=2, padding=1)]
        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in base_model:
            out_channels = 4 * ceil(int(channels * width_factor) / 4)
            layers_repeats = ceil(repeats * depth_factor)

            for layer in range(layers_repeats):
                features.append(
                    InvertedResidualBlock(
                        conv_func,
                        in_channels,
                        out_channels,
                        expand_ratio=expand_ratio,
                        stride=stride if layer == 0 else 1,
                        kernel_size=kernel_size,
                        padding=kernel_size // 2,  # if k=1:pad=0, k=3:pad=1, k=5:pad=2
                         **kwargs,
                    )
                )
                in_channels = out_channels

        features.append(
            CNNBlock(conv_func, in_channels, last_channel, kernel_size=1, stride=1, padding=0,  **kwargs)
        )
        return nn.Sequential(*features)
    
    def forward(self, x):
        x = self.pool(self.features(x))
        return self.classifier(x.view(x.shape[0], -1))
    def complexity_loss(self):
        size_product = []
        loss = 0
        for m in self.modules():
            if isinstance(m, self.conv_func):
                complexity_loss, _ = m.complexity_loss()
                loss += complexity_loss
                size_product += [m.size_product]
        normalizer = size_product[0].item()
        loss /= normalizer
        return loss
    
    def split_complexity_loss(self):
        loss = 0
        layer_idx = 0
        for m in self.modules():
            if isinstance(m, self.conv_func):
                _, split_complexity_loss = m.complexity_loss()
                if layer_idx in [0, 4, 9, 19, 44, 59]:
                    loss += split_complexity_loss
                layer_idx += 1
        normalizer = 6 * 5
        loss /= normalizer
        return loss
    
    def fetch_best_arch(self):
        sum_bitops, sum_bita, sum_bitw = 0, 0, 0
        sum_mixbitops, sum_mixbita, sum_mixbitw = 0, 0, 0
        layer_idx = 0
        best_arch = None
        for m in self.modules():
            if isinstance(m, self.conv_func):
                layer_arch, bitops, bita, bitw, mixbitops, mixbita, mixbitw = m.fetch_best_arch(layer_idx)
                if best_arch is None:
                    best_arch = layer_arch
                else:
                    for key in layer_arch.keys():
                        if key not in best_arch:
                            best_arch[key] = layer_arch[key]
                        else:
                            best_arch[key].append(layer_arch[key][0])
                sum_bitops += bitops
                sum_bita += bita
                sum_bitw += bitw
                sum_mixbitops += mixbitops
                sum_mixbita += mixbita
                sum_mixbitw += mixbitw
                layer_idx += 1
        return best_arch, sum_bitops, sum_bita, sum_bitw, sum_mixbitops, sum_mixbita, sum_mixbitw



def mixeffnet_b0_w1234a234(**kwargs):
    version = "b0"
    return EfficientNet(qm.MixActivConv2d, version, num_classes=1000,
                     wbits=[1, 2, 3, 4], abits=[2, 3, 4], share_weight=True, **kwargs)

def mixeffnet_b0_w1234a234_100(**kwargs):
    version = "b0"
    return EfficientNet(qm.MixActivConv2d, version, num_classes=100,
                     wbits=[1, 2, 3, 4], abits=[2, 3, 4], share_weight=True, **kwargs)
    
def mixeffnet_b0_w2468a2468_100(**kwargs):
    version = "b0"
    return EfficientNet(qm.MixActivConv2d, version, num_classes=100,
                     wbits=[2, 4, 6, 8], abits=[2, 4, 6, 8], share_weight=True, **kwargs)

def mixeffnet_b0_w248a248_chan(**kwargs):
    version = "b0"
    return EfficientNet(qm.MixActivChanConv2d, version, num_classes=100,
                     wbits=[1, 2, 3, 4], abits=[2, 3, 4], share_weight=True, **kwargs)


def mixeffnet_b3_w2468a2468_100(**kwargs):
    version = "b3"
    return EfficientNet(qm.MixActivConv2d, version, num_classes=100,
                     wbits=[2, 4, 6, 8], abits=[2, 4, 6, 8], share_weight=True, **kwargs)
    
    
    
    # @classmethod
    # def get_image_size(cls, model_name):
    #     """Get the input image size for a given efficientnet model.

    #     Args:
    #         model_name (str): Name for efficientnet.

    #     Returns:
    #         Input image size (resolution).
    #     """
    #     cls._check_model_name_is_valid(model_name)
    #     _, _, res, _ = efficientnet_params(model_name)
    #     return res
    
    #image_size = EfficientNet.get_image_size(args.arch)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# https://github.com/kuan-wang/pytorch-mobilenet-v3/tree/master

import torch
import torch.nn as nn
import torch.nn.functional as F


__all__ = ['MobileNetV3', 'mobilenetv3']


def conv_bn(inp, oup, stride, conv_layer=nn.Conv2d, norm_layer=nn.BatchNorm2d, nlin_layer=nn.ReLU):
    return nn.Sequential(
        conv_layer(inp, oup, 3, stride, 1, bias=False),
        norm_layer(oup),
        nlin_layer(inplace=True)
    )


def conv_1x1_bn(inp, oup, conv_layer=nn.Conv2d, norm_layer=nn.BatchNorm2d, nlin_layer=nn.ReLU):
    return nn.Sequential(
        conv_layer(inp, oup, 1, 1, 0, bias=False),
        norm_layer(oup),
        nlin_layer(inplace=True)
    )


class Hswish(nn.Module):
    def __init__(self, inplace=True):
        super(Hswish, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return x * F.relu6(x + 3., inplace=self.inplace) / 6.


class Hsigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(Hsigmoid, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return F.relu6(x + 3., inplace=self.inplace) / 6.


class SEModule(nn.Module):
    def __init__(self, channel, reduction=4):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            Hsigmoid()
            # nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


class Identity(nn.Module):
    def __init__(self, channel):
        super(Identity, self).__init__()

    def forward(self, x):
        return x


def make_divisible(x, divisible_by=8):
    import numpy as np
    return int(np.ceil(x * 1. / divisible_by) * divisible_by)


class MobileBottleneck(nn.Module):
    def __init__(self, inp, oup, kernel, stride, exp, se=False, nl='RE'):
        super(MobileBottleneck, self).__init__()
        assert stride in [1, 2]
        assert kernel in [3, 5]
        padding = (kernel - 1) // 2
        self.use_res_connect = stride == 1 and inp == oup

        conv_layer = nn.Conv2d
        norm_layer = nn.BatchNorm2d
        if nl == 'RE':
            nlin_layer = nn.ReLU # or ReLU6
        elif nl == 'HS':
            nlin_layer = Hswish
        else:
            raise NotImplementedError
        if se:
            SELayer = SEModule
        else:
            SELayer = Identity

        self.conv = nn.Sequential(
            # pw
            conv_layer(inp, exp, 1, 1, 0, bias=False),
            norm_layer(exp),
            nlin_layer(inplace=True),
            # dw
            conv_layer(exp, exp, kernel, stride, padding, groups=exp, bias=False),
            norm_layer(exp),
            SELayer(exp),
            nlin_layer(inplace=True),
            # pw-linear
            conv_layer(exp, oup, 1, 1, 0, bias=False),
            norm_layer(oup),
        )

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV3(nn.Module):
    def __init__(self, n_class=1000, input_size=224, dropout=0.8, mode='small', width_mult=1.0):
        super(MobileNetV3, self).__init__()
        input_channel = 16
        last_channel = 1280
        if mode == 'large':
            # refer to Table 1 in paper
            mobile_setting = [
                # k, exp, c,  se,     nl,  s,
                [3, 16,  16,  False, 'RE', 1],
                [3, 64,  24,  False, 'RE', 2],
                [3, 72,  24,  False, 'RE', 1],
                [5, 72,  40,  True,  'RE', 2],
                [5, 120, 40,  True,  'RE', 1],
                [5, 120, 40,  True,  'RE', 1],
                [3, 240, 80,  False, 'HS', 2],
                [3, 200, 80,  False, 'HS', 1],
                [3, 184, 80,  False, 'HS', 1],
                [3, 184, 80,  False, 'HS', 1],
                [3, 480, 112, True,  'HS', 1],
                [3, 672, 112, True,  'HS', 1],
                [5, 672, 160, True,  'HS', 2],
                [5, 960, 160, True,  'HS', 1],
                [5, 960, 160, True,  'HS', 1],
            ]
        elif mode == 'small':
            # refer to Table 2 in paper
            mobile_setting = [
                # k, exp, c,  se,     nl,  s,
                [3, 16,  16,  True,  'RE', 2],
                [3, 72,  24,  False, 'RE', 2],
                [3, 88,  24,  False, 'RE', 1],
                [5, 96,  40,  True,  'HS', 2],
                [5, 240, 40,  True,  'HS', 1],
                [5, 240, 40,  True,  'HS', 1],
                [5, 120, 48,  True,  'HS', 1],
                [5, 144, 48,  True,  'HS', 1],
                [5, 288, 96,  True,  'HS', 2],
                [5, 576, 96,  True,  'HS', 1],
                [5, 576, 96,  True,  'HS', 1],
            ]
        else:
            raise NotImplementedError

        # building first layer
        assert input_size % 32 == 0
        last_channel = make_divisible(last_channel * width_mult) if width_mult > 1.0 else last_channel
        self.features = [conv_bn(3, input_channel, 2, nlin_layer=Hswish)]
        self.classifier = []

        # building mobile blocks
        for k, exp, c, se, nl, s in mobile_setting:
            output_channel = make_divisible(c * width_mult)
            exp_channel = make_divisible(exp * width_mult)
            self.features.append(MobileBottleneck(input_channel, output_channel, k, s, exp_channel, se, nl))
            input_channel = output_channel

        # building last several layers
        if mode == 'large':
            last_conv = make_divisible(960 * width_mult)
            self.features.append(conv_1x1_bn(input_channel, last_conv, nlin_layer=Hswish))
            self.features.append(nn.AdaptiveAvgPool2d(1))
            self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
            self.features.append(Hswish(inplace=True))
        elif mode == 'small':
            last_conv = make_divisible(576 * width_mult)
            self.features.append(conv_1x1_bn(input_channel, last_conv, nlin_layer=Hswish))
            # self.features.append(SEModule(last_conv))  # refer to paper Table2, but I think this is a mistake
            self.features.append(nn.AdaptiveAvgPool2d(1))
            self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
            self.features.append(Hswish(inplace=True))
        else:
            raise NotImplementedError

        # make it nn.Sequential
        self.features = nn.Sequential(*self.features)

        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),    # refer to paper section 6
            nn.Linear(last_channel, n_class),
        )

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.mean(3).mean(2)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)


def mobilenetv3(pretrained=False, **kwargs):
    model = MobileNetV3(**kwargs)
    if pretrained:
        state_dict = torch.load('mobilenetv3_small_67.4.pth.tar')
        model.load_state_dict(state_dict, strict=True)
        # raise NotImplementedError
    return model


if __name__ == '__main__':
    model = mobilenetv3()
    print('mobilenetv3:\n', model)
    print('Total params: %.2fM' % (sum(p.numel() for p in net.parameters())/1000000.0))
    input_size=(1, 3, 224, 224)


    x = torch.randn(input_size)
    out = model(x)




mobilenetv3:
 MobileNetV3(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Hswish()
    )
    (1): MobileBottleneck(
      (conv): Sequential(
        (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): SEModule(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (fc): Sequential(
            (0): Linear(in_features=16, out_features=4, bias=False)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=4, out_features=16, bias=Fa

NameError: name 'net' is not defined

In [2]:
from torchinfo import summary
model = mixeffnet_b3_w2468a2468_100()
x = summary(model=model, input_size=(1, 3, 300, 300))

abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]


In [1]:
import torch
import torch.nn as nn
from math import ceil
from torchinfo import summary
import quant_module as qm
# 

__all__ = ['quanteffnet_w8a8_chan', 'quanteffnet_cfg', "quanteffnet_w32a32_chan", 
           "quanteffnet_w2a2_chan", "quanteffnet_w4a4_chan", "quanteffnet_w8a8", 
           "quanteffnet_cfg_2468", "quanteffnet_w4a4", "quanteffnet_w3a3", "quanteffnet_w2a2",
           "quanteffnet_cfg_2468_b3", "quanteffnet_w8a8_b3"]

base_model = [
    # expand_ratio, channels, repeats, stride, kernel_size
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3],
]

phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}


class BasicCNNBlock(nn.Module):

    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicCNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
        self.silu = nn.SiLU()  # SiLU <-> Swish
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return self.silu(x)

class CNNBlock(nn.Module):
    def __init__(
        self, conv_func, in_channels, out_channels, wbit, abit,
        kernel_size, stride, padding, groups=1, **kwargs
    ):
        super(CNNBlock, self).__init__()
        self.cnn = conv_func(
            in_channels,
            out_channels,
            wbit,
            abit,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            bias=False,
            **kwargs,
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU()  # SiLU <-> Swish

    def forward(self, x):
        return self.silu(self.bn(self.cnn(x)))


class SqueezeExcitation(nn.Module):
    def __init__(self, conv_func, archws, archas, in_channels, reduced_dim, **kwargs):
        super(SqueezeExcitation, self).__init__()
        assert len(archas) == 2
        assert len(archws) == 2
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),  # C x H x W -> C x 1 x 1
            conv_func(
                in_channels, reduced_dim, archws[0], archas[0], kernel_size=1, bias=False, **kwargs
            ),  # C x 1 x 1 -> C_reduced x 1 x 1
            nn.SiLU(),  # SiLU <-> Swish
            conv_func(
                reduced_dim, in_channels, archws[1], archas[1], kernel_size=1, bias=False, **kwargs
            ),  # C_reduced x 1 x 1 -> C x 1 x 1
            nn.Sigmoid(),
        )

    def forward(self, x):
        return x * self.se(x)


class InvertedResidualBlock(nn.Module):
    def __init__(
            self,
            conv_func,
            archws, 
            archas,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            expand_ratio,
            reduction=4,  # squeeze excitation
            survival_prob=0.8,  # for stochastic depth
             **kwargs,
    ):
        super(InvertedResidualBlock, self).__init__()

        i = 0
        self.survival_prob = survival_prob
        self.use_residual = in_channels == out_channels and stride == 1
        hidden_dim = in_channels * expand_ratio
        self.expand = in_channels != hidden_dim
        reduced_dim = int(in_channels / reduction)
        if self.expand:
            assert len(archas) == 5
            assert len(archws) == 5
        else:
            assert len(archas) == 4
            assert len(archws) == 4
        if self.expand:
            self.expand_conv = CNNBlock(
                conv_func, in_channels, hidden_dim, archws[i], archas[i], kernel_size=3, stride=1, padding=1, **kwargs,
            )
            i += 1

        self.conv = nn.Sequential(
            CNNBlock(
                conv_func, hidden_dim, hidden_dim, archws[i], archas[i], kernel_size, stride, padding, groups=hidden_dim, **kwargs,
            ),
            SqueezeExcitation(conv_func, archws[i+1:i+3], archas[i+1:i+3], hidden_dim, reduced_dim, **kwargs,),
            conv_func(hidden_dim, out_channels, archws[i+3], archas[i+3], kernel_size=1, bias=False, **kwargs,),
            nn.BatchNorm2d(out_channels,),
        )

    def stochastic_depth(self, x):
        if not self.training:
            return x

        binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
        return torch.div(x, self.survival_prob) * binary_tensor
    
    def forward(self, inputs):
        x = self.expand_conv(inputs) if self.expand else inputs

        if self.use_residual:
            # return self.stochastic_depth(self.conv(x)) + inputs
            if self.expand:
                return self.stochastic_depth(self.conv(x)) + self.expand_conv.cnn.quant_skip
            else:
                return self.stochastic_depth(self.conv(x)) + self.conv[0].cnn.quant_skip
        else:
            return self.conv(x)
        



class EfficientNet(nn.Module):

    def __init__(self, conv_func, version, archws, archas, num_classes=1000, **kwargs):
        print('archas: {}'.format(archas))
        print('archws: {}'.format(archws))
        # assert len(archas) == 80
        # assert len(archws) == 80
        self.conv_func = conv_func
        super(EfficientNet, self).__init__()
        width_factor, depth_factor, dropout_rate, res = self.calculate_factors(version)
        self.res = res
        last_channel = ceil(1280 * width_factor)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(conv_func, width_factor, depth_factor, last_channel, archws, archas, **kwargs)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(last_channel, num_classes),
        )

    def calculate_factors(self, version, alpha=1.2, beta=1.1):
        phi, res, drop_rate = phi_values[version]
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return width_factor, depth_factor, drop_rate, res
    
    def create_features(self, conv_func, width_factor, depth_factor, last_channel, archws, archas, **kwargs):
        channels = int(32 * width_factor)
        features = [BasicCNNBlock(3, channels, kernel_size=3, stride=2, padding=1)]
        in_channels = channels
        i = 0
        for expand_ratio, channels, repeats, stride, kernel_size in base_model:
            out_channels = 4 * ceil(int(channels * width_factor) / 4)
            layers_repeats = ceil(repeats * depth_factor)
            if expand_ratio == 1:
                j = 4
            else:
                j = 5
            for layer in range(layers_repeats):
                features.append(
                    InvertedResidualBlock(
                        conv_func,
                        archws[i:i+j],
                        archas[i:i+j],
                        in_channels,
                        out_channels,
                        expand_ratio=expand_ratio,
                        stride=stride if layer == 0 else 1,
                        kernel_size=kernel_size,
                        padding=kernel_size // 2,  # if k=1:pad=0, k=3:pad=1, k=5:pad=2
                         **kwargs,
                    )
                )
                in_channels = out_channels
                i += j
        # assert i == 79
        features.append(
            CNNBlock(conv_func, in_channels, last_channel, archws[j], archas[j], kernel_size=1, stride=1, padding=0,  **kwargs)
        )
        return nn.Sequential(*features)
    
    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        return self.classifier(x.view(x.shape[0], -1))

    def fetch_arch_info(self):
        sum_bitops, sum_bita, sum_bitw = 0, 0, 0
        layer_idx = 0
        for m in self.modules():
            if isinstance(m, self.conv_func):
                size_product = m.size_product.item()
                memory_size = m.memory_size.item()
                bitops = size_product * m.abit * m.wbit
                bita = m.memory_size.item() * m.abit
                bitw = m.param_size * m.wbit
                # weight_shape = list(m.conv.weight.shape)
                # print('idx {} with shape {}, bitops: {:.3f}M * {} * {}, memory: {:.3f}K * {}, '
                #       'param: {:.3f}M * {}'.format(layer_idx, weight_shape, size_product, m.abit,
                #                                    m.wbit, memory_size, m.abit, m.param_size, m.wbit))
                sum_bitops += bitops
                sum_bita += bita
                sum_bitw += bitw
                layer_idx += 1
        return sum_bitops, sum_bita, sum_bitw


def _load_arch(arch_path, names_nbits):
    checkpoint = torch.load(arch_path)
    state_dict = checkpoint['state_dict']
    best_arch, worst_arch = {}, {}
    for name in names_nbits.keys():
        best_arch[name], worst_arch[name] = [], []
    for name, params in state_dict.items():
        name = name.split('.')[-1]
        if name in names_nbits.keys():
            alpha = params.cpu().numpy()
            assert names_nbits[name] == alpha.shape[0]
            best_arch[name].append(alpha.argmax())
            worst_arch[name].append(alpha.argmin())

    return best_arch, worst_arch
    

def quanteffnet_w8a8(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [8] *80
    archws = [8] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)

def quanteffnet_w4a4(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [4] *80
    archws = [4] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)

def quanteffnet_w3a3(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [3] *80
    archws = [3] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)

def quanteffnet_w2a2(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [2] *80
    archws = [2] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)


def quanteffnet_w8a8_chan(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [8] *80
    archws = [8] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantMixActivChanConv2d, version, archws, archas, num_classes=100, **kwargs)

def quanteffnet_cfg(arch_cfg_path, **kwargs):
    wbits, abits = [1, 2, 3, 4], [2, 3, 4]
    version = "b0"
    name_nbits = {'alpha_activ': len(abits), 'alpha_weight': len(wbits)}
    best_arch, worst_arch = _load_arch(arch_cfg_path, name_nbits)
    archas = [abits[a] for a in best_arch['alpha_activ']]
    archws = [wbits[w] for w in best_arch['alpha_weight']]
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)


def quanteffnet_cfg_2468(arch_cfg_path, **kwargs):
    wbits, abits = [2, 4, 6, 8],  [2, 4, 6, 8]
    version = "b0"
    name_nbits = {'alpha_activ': len(abits), 'alpha_weight': len(wbits)}
    best_arch, worst_arch = _load_arch(arch_cfg_path, name_nbits)
    archas = [abits[a] for a in best_arch['alpha_activ']]
    archws = [wbits[w] for w in best_arch['alpha_weight']]
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)


def quanteffnet_cfg_2468_b3(arch_cfg_path, **kwargs):
    wbits, abits = [2, 4, 6, 8],  [2, 4, 6, 8]
    version = "b3"
    name_nbits = {'alpha_activ': len(abits), 'alpha_weight': len(wbits)}
    best_arch, worst_arch = _load_arch(arch_cfg_path, name_nbits)
    archas = [abits[a] for a in best_arch['alpha_activ']]
    archws = [wbits[w] for w in best_arch['alpha_weight']]
    # assert len(archas) == 80
    # assert len(archws) == 80
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)

 
def quanteffnet_w8a8_b3(arch_cfg_path, **kwargs):
    version = "b3"
    archas = [8] *129
    archws = [8] *129
    assert len(archas) == 129
    assert len(archws) == 129
    return EfficientNet(qm.QuantActivConv2d, version, archws, archas, num_classes=100, **kwargs)

 
def quanteffnet_w32a32_chan(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [32] *80
    archws = [32] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantMixActivChanConv2d, version, archws, archas, num_classes=100, **kwargs)

def quanteffnet_w2a2_chan(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [2] *80
    archws = [2] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantMixActivChanConv2d, version, archws, archas, num_classes=100, **kwargs)

def quanteffnet_w4a4_chan(arch_cfg_path, **kwargs):
    version = "b0"
    archas = [4] *80
    archws = [4] *80
    assert len(archas) == 80
    assert len(archws) == 80
    return EfficientNet(qm.QuantMixActivChanConv2d, version, archws, archas, num_classes=100, **kwargs)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = quanteffnet_w8a8("a")
model

archas: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
archws: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]


EfficientNet(
  (pool): AdaptiveAvgPool2d(output_size=1)
  (features): Sequential(
    (0): BasicCNNBlock(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (silu): SiLU()
    )
    (1): InvertedResidualBlock(
      (conv): Sequential(
        (0): CNNBlock(
          (cnn): QuantActivConv2d(
            (activ): HWGQ()
            (conv): QuantConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          )
          (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (silu): SiLU()
        )
        (1): SqueezeExcitation(
          (se): Sequential(
            (0): AdaptiveAvgPool2d(output_size=1)
            (1): QuantActivConv2d(
              (activ): HWGQ()
              (conv): QuantConv2d(32, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
            )
     

In [34]:
import torch
import time

class LayerTimer:
    def __init__(self):
        self.start_times = {}
        self.durations = {}

    def start(self, layer_name):
        torch.cuda.synchronize()
        self.start_times[layer_name] = time.time()

    def stop(self, layer_name):
        torch.cuda.synchronize()
        duration = time.time() - self.start_times[layer_name]
        if layer_name in self.durations:
            self.durations[layer_name].append(duration)
        else:
            self.durations[layer_name] = [duration]

def add_timing_hooks(layer, layer_timer, layer_name=""):
    if isinstance(layer, (InvertedResidualBlock, BasicCNNBlock, CNNBlock)):
        # 対象のレイヤータイプにフックを追加
        layer.register_forward_pre_hook(lambda layer, input: layer_timer.start(layer_name))
        layer.register_forward_hook(lambda layer, input, output: layer_timer.stop(layer_name))
    elif hasattr(layer, 'children') and len(list(layer.children())) > 0:
        # 子モジュールがある場合、それぞれに対して再帰的にフックを追加
        for name, child in layer.named_children():
            add_timing_hooks(child, layer_timer, f"{layer_name}.{name}" if layer_name else name)


model = quanteffnet_w8a8("a").to('cuda:1')  # GPUを使用する場合
model.eval()

# レイヤータイマーの初期化
layer_timer = LayerTimer()
# フックの追加
add_timing_hooks(model, layer_timer)

# ダミーの入力データ
input_data = torch.randn(30, 3, 224, 224).to('cuda:1')  # GPUを使用する場合
# 推論の実行
model(input_data)

# 各InvertedResidualBlockの推論時間を表示
for layer_name, durations in layer_timer.durations.items():
    print(f"{layer_name}: {sum(durations) / len(durations)} seconds")

archas: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
archws: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
features.0: 0.0003879070281982422 seconds
features.1: 0.17446517944335938 seconds
features.2: 0.6893327236175537 seconds
features.3: 0.5637657642364502 seconds
features.4: 0.31708192825317383 seconds
features.5: 0.12467694282531738 seconds
features.6: 0.13201284408569336 seconds
features.7: 0.022197723388671875 seconds
features.8: 0.038918495178222656 seconds
features.9: 0.027732372283935547 seconds
features.10: 0.15244102478027344 seconds
features.11: 0.05519533157348633 seconds
features.12: 0.06

In [51]:
for name, layer in model.named_modules():
    print(name)


pool
features
features.0
features.0.conv
features.0.bn
features.0.silu
features.1
features.1.conv
features.1.conv.0
features.1.conv.0.cnn
features.1.conv.0.cnn.activ
features.1.conv.0.cnn.conv
features.1.conv.0.bn
features.1.conv.0.silu
features.1.conv.1
features.1.conv.1.se
features.1.conv.1.se.0
features.1.conv.1.se.1
features.1.conv.1.se.1.activ
features.1.conv.1.se.1.conv
features.1.conv.1.se.2
features.1.conv.1.se.3
features.1.conv.1.se.3.activ
features.1.conv.1.se.3.conv
features.1.conv.1.se.4
features.1.conv.2
features.1.conv.2.activ
features.1.conv.2.conv
features.1.conv.3
features.2
features.2.expand_conv
features.2.expand_conv.cnn
features.2.expand_conv.cnn.activ
features.2.expand_conv.cnn.conv
features.2.expand_conv.bn
features.2.expand_conv.silu
features.2.conv
features.2.conv.0
features.2.conv.0.cnn
features.2.conv.0.cnn.activ
features.2.conv.0.cnn.conv
features.2.conv.0.bn
features.2.conv.0.silu
features.2.conv.1
features.2.conv.1.se
features.2.conv.1.se.0
features.2.con

In [8]:
# print(model.features)

# for module in model.features[1].modules():
    # print(module)
# MixActivConv2dを探すみたいな

print(model.features)
# for module in model.modules():
#     print(module)


# for name, layer in model.named_children():
#     print(name, layer)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Hswish()
  )
  (1): MobileBottleneck(
    (conv): Sequential(
      (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
      (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SEModule(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=16, out_features=4, bias=False)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=4, out_features=16, bias=False)
          (3): Hsigmoid()
        )
      )
      (6): ReLU(inplace=True

In [9]:
def print_model(module, name="model", depth=0):
    if len(list(module.named_children())) == 0:
        print(f"{' ' * depth} {name}: {module}")
    else:
        print(f"{' ' * depth} {name}: {type(module)}")

    for child_name, child_module in module.named_children():
        if isinstance(module, torch.nn.Sequential):
            child_name = f"{name}[{child_name}]"
        else:
            child_name = f"{name}.{child_name}"
        print_model(child_module, child_name, depth + 1)


print_model(model)

 model: <class '__main__.MobileNetV3'>
  model.features: <class 'torch.nn.modules.container.Sequential'>
   model.features[0]: <class 'torch.nn.modules.container.Sequential'>
    model.features[0][0]: Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    model.features[0][1]: BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    model.features[0][2]: Hswish()
   model.features[1]: <class '__main__.MobileBottleneck'>
    model.features[1].conv: <class 'torch.nn.modules.container.Sequential'>
     model.features[1].conv[0]: Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
     model.features[1].conv[1]: BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     model.features[1].conv[2]: ReLU(inplace=True)
     model.features[1].conv[3]: Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
     model.features[1].conv[4]: BatchNorm2d(16, eps=1e-05, momentum=0

In [62]:
import torch
from torch import nn, optim
from torchvision import datasets, models, transforms
from torchvision.models.feature_extraction import create_feature_extractor


# モデル
model = models.resnet18()
# model = mixeffnet_b3_w2468a2468_100()
# create_feature_extractorを使用する場合


feature_extractor = create_feature_extractor(model, ["layer1"])
x = torch.randn(1, 3, 224, 224)
features = feature_extractor(x)

print(features["layer1"].shape)



torch.Size([1, 64, 56, 56])


In [111]:
def count_conv2d_layers(model):
    count = 0
    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            count += 1
        elif isinstance(module, nn.Sequential):
            # Sequentialブロック内でさらにConv2dを探す
            for sub_module in module:
                if isinstance(sub_module, nn.Conv2d):
                    count += 1
    return count

num_conv2d = count_conv2d_layers(model.features[1])
print(f"Total number of Conv2d layers: {num_conv2d}")

Total number of Conv2d layers: 4


In [103]:
for name, module in model.named_modules():
    if isinstance(module, InvertedResidualBlock):
        cnn_count = sum(1 for _ in module.modules() if isinstance(_, nn.Conv2d))
        print(f"Block: {name}, Number of CNNs: {cnn_count}")

Block: features.1, Number of CNNs: 4
Block: features.2, Number of CNNs: 5
Block: features.3, Number of CNNs: 5
Block: features.4, Number of CNNs: 5
Block: features.5, Number of CNNs: 5
Block: features.6, Number of CNNs: 5
Block: features.7, Number of CNNs: 5
Block: features.8, Number of CNNs: 5
Block: features.9, Number of CNNs: 5
Block: features.10, Number of CNNs: 5
Block: features.11, Number of CNNs: 5
Block: features.12, Number of CNNs: 5
Block: features.13, Number of CNNs: 5
Block: features.14, Number of CNNs: 5
Block: features.15, Number of CNNs: 5
Block: features.16, Number of CNNs: 5


tensor([[-0.1104, -0.0256, -0.1149,  0.1797,  0.2848, -0.2497, -0.0322,  0.1326,
          0.0137,  0.1957]], grad_fn=<ToCopyBackward0>)


In [138]:
def test_inverted_residual_blocks(model, input_size):
    device = next(model.parameters()).device
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)

    for name, module in model.named_modules():
        if isinstance(module, InvertedResidualBlock):
            cnn_count = sum(1 for _ in module.modules() if isinstance(_, nn.Conv2d))
            print(f"Block: {name}, Number of CNNs: {cnn_count}")

            try:
                # モック入力をブロックに適用して入力サイズをテスト
                output = module(mock_input)
                print(f"Block: {name}, Input size: {mock_input.shape}, Output size: {output.shape}")
                mock_input = output  # 次のブロックのために出力を入力として更新
            except RuntimeError as e:
                print(f"Error processing block {name}: {e}")
                break

# モデルのインスタンス化
model = mixeffnet_b0_w1234a234()  # または他のバージョン

# ブロックのテスト
test_inverted_residual_blocks(model, input_size=224)


abits: [2, 3, 4]
wbits: [1, 2, 3, 4]
Block: features.1, Number of CNNs: 4
Error processing block features.1: Given groups=32, weight of size [32, 1, 3, 3], expected input[1, 3, 224, 224] to have 32 channels, but got 3 channels instead


In [14]:
def test_conv2d_input_sizes(model, input_size):
    device = next(model.parameters()).device
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)

    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d):
            try:
                # モック入力をモジュールに適用
                output = module(mock_input)
                print(f"Module: {name}, Input size: {mock_input.shape}, Output size: {output.shape}")
                mock_input = output  # 次のモジュールのために出力を入力として更新
            except RuntimeError as e:
                print(f"Error processing module {name}: {e}")
                break

# モデルのインスタンス化
# model = mixeffnet_b0_w1234a234()  # または他のバージョン

# モジュールのテスト
test_conv2d_input_sizes(model, input_size=224)


Module: features.0.conv, Input size: torch.Size([1, 3, 224, 224]), Output size: torch.Size([1, 38, 112, 112])
Module: features.1.conv.0.cnn.mix_weight.conv, Input size: torch.Size([1, 38, 112, 112]), Output size: torch.Size([1, 38, 112, 112])
Module: features.1.conv.1.se.1.mix_weight.conv, Input size: torch.Size([1, 38, 112, 112]), Output size: torch.Size([1, 9, 112, 112])
Module: features.1.conv.1.se.3.mix_weight.conv, Input size: torch.Size([1, 9, 112, 112]), Output size: torch.Size([1, 38, 112, 112])
Module: features.1.conv.2.mix_weight.conv, Input size: torch.Size([1, 38, 112, 112]), Output size: torch.Size([1, 20, 112, 112])
Module: features.2.conv.0.cnn.mix_weight.conv, Input size: torch.Size([1, 20, 112, 112]), Output size: torch.Size([1, 20, 112, 112])
Module: features.2.conv.1.se.1.mix_weight.conv, Input size: torch.Size([1, 20, 112, 112]), Output size: torch.Size([1, 5, 112, 112])
Module: features.2.conv.1.se.3.mix_weight.conv, Input size: torch.Size([1, 5, 112, 112]), Output

In [4]:
model = mixeffnet_b3_w2468a2468_100()
# model = mobilenetv3()
# model = torchvision.models.mobilenet_v3_large()

def get_natural_bottlenecks_pytorch(model, input_size, compressive_only=True):
    natural_bottlenecks = []
    best_compression = 1.0
    cnn_count = 0  # CNNレイヤーのカウント

    device = next(model.parameters()).device
    
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)

    previous_size = torch.prod(torch.tensor(mock_input.shape[1:])).item()

    for name, layer in model.named_modules():
        try:
            if isinstance(layer, nn.Conv2d):
                cnn_count += 1  # CNNレイヤーをカウント

            if isinstance(layer, (nn.Conv2d, nn.Linear)):
                if isinstance(layer, nn.Linear) and len(mock_input.shape) > 2:
                    mock_input = torch.flatten(mock_input, 1)

                output = layer(mock_input)
                input_size_layer = previous_size
                previous_size = torch.prod(torch.tensor(output.shape[1:])).item()

                if input_size_layer < input_size * input_size * 3:
                    compression = float(input_size_layer) / (input_size * input_size * 3)
                    if not compressive_only or compression < best_compression:
                        natural_bottlenecks.append({
                            'layer_name': name,
                            'compression': compression,
                            'cnn_layer_number': cnn_count  # ここでCNNレイヤーの番号を記録
                        })
                        best_compression = compression

                mock_input = output.detach()
        except RuntimeError as e:
            print(f"Error processing layer {name}: {e}")
            break

    return natural_bottlenecks

# ボトルネックを取得
bottlenecks = get_natural_bottlenecks_pytorch(model, input_size=224)
for bottleneck in bottlenecks:
    print(bottleneck)


abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]
Error processing layer classifier.1: mat1 and mat2 shapes cannot be multiplied (1x75901 and 1549x100)
{'layer_name': 'features.1.conv.1.se.3.mix_weight.conv', 'compression': 0.75, 'cnn_layer_number': 4}
{'layer_name': 'features.2.conv.1.se.3.mix_weight.conv', 'compression': 0.4166666666666667, 'cnn_layer_number': 8}
{'layer_name': 'features.3.conv.1.se.3.mix_weight.conv', 'compression': 0.10416666666666667, 'cnn_layer_number': 13}
{'layer_name': 'features.6.conv.1.se.3.mix_weight.conv', 'compression': 0.041666666666666664, 'cnn_layer_number': 28}
{'layer_name': 'features.9.conv.1.se.3.mix_weight.conv', 'compression': 0.015625, 'cnn_layer_number': 43}
{'layer_name': 'features.19.conv.1.se.3.mix_weight.conv', 'compression': 0.011067708333333334, 'cnn_layer_number': 93}


In [22]:
import torchvision
# model = mixeffnet_b3_w2468a2468_100()
# model = mobilenetv3()
model = torchvision.models.mobilenet_v3_large()
for name, layer in model.named_modules():
    print(name, layer)

 MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), b

In [99]:
model = mixeffnet_b0_w2468a2468_100()

# bitの出力を直接入力できるようにする、その後DSCを実行する
def get_natural_bottlenecks_pytorch(model, input_size, compressive_only=True):
    natural_bottlenecks = []
    best_compression = 1.0
    cnn_count = 0  # CNNレイヤーのカウント

    device = next(model.parameters()).device
    
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)

    previous_size = torch.prod(torch.tensor(mock_input.shape[1:])).item()
    for name, layer in model.named_modules():
        try:
            if isinstance(layer, nn.Conv2d):
                cnn_count += 1  # CNNレイヤーをカウント

            if isinstance(layer, (nn.Conv2d, nn.Linear)):
                if isinstance(layer, nn.Linear) and len(mock_input.shape) > 2:
                    mock_input = torch.flatten(mock_input, 1)

                output = layer(mock_input)
                input_size_layer = previous_size
                previous_size = torch.prod(torch.tensor(output.shape[1:])).item()

                if input_size_layer < input_size * input_size * 3:
                    compression = float(input_size_layer) / (input_size * input_size * 3)
                    if not compressive_only or compression < best_compression:
                        natural_bottlenecks.append({
                            'layer_name': name,
                            'compression': compression,
                            'cnn_layer_number': cnn_count  # ここでCNNレイヤーの番号を記録
                        })
                        best_compression = compression

                mock_input = output.detach()
        except RuntimeError as e:
            print(f"Error processing layer {name}: {e}")
            break

    return natural_bottlenecks



# ボトルネックを取得
bottlenecks = get_natural_bottlenecks_pytorch(model, input_size=224)
for bottleneck in bottlenecks:
    print(bottleneck)


abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]
Error processing layer classifier.1: mat1 and mat2 shapes cannot be multiplied (1x62720 and 1280x100)
{'layer_name': 'features.1.conv.1.se.3.mix_weight.conv', 'compression': 0.6666666666666666, 'cnn_layer_number': 4}
{'layer_name': 'features.2.conv.1.se.3.mix_weight.conv', 'compression': 0.08333333333333333, 'cnn_layer_number': 9}
{'layer_name': 'features.4.conv.1.se.3.mix_weight.conv', 'compression': 0.03125, 'cnn_layer_number': 19}
{'layer_name': 'features.6.conv.1.se.3.mix_weight.conv', 'compression': 0.013020833333333334, 'cnn_layer_number': 29}
{'layer_name': 'features.12.conv.1.se.3.mix_weight.conv', 'compression': 0.009114583333333334, 'cnn_layer_number': 59}


In [5]:
model = mixeffnet_b0_w2468a2468_100()


def get_natural_bottlenecks_pytorch(model, input_size, compressive_only=True):
    # 各層のinputサイズを計算して、圧縮率が最も高い層を探す
    natural_bottlenecks = []
    best_compression = 1.0
    cnn_count = 0  # CNNレイヤーのカウント
    input_bit = 8 # 入力のbit数
    min_bit = 2  # 探索する最小のbit数
    bit_compression = min_bit / input_bit

    device = next(model.parameters()).device
    
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)
    previous_size = torch.prod(torch.tensor(mock_input.shape[1:])).item()

    for i, module in enumerate(model.features):
        # print(i, module)
        block_number = i
        if isinstance(module, BasicCNNBlock):
            print(f"Encountered BasicBlock at features.{i}")
            output = module(mock_input)
            mock_input = output.detach()
            continue
        
        input_size_layer = torch.prod(torch.tensor(mock_input.shape[1:])).item()
        if input_size_layer * min_bit < input_size * input_size * 3 * input_bit:
            compression = float(input_size_layer) / (input_size * input_size * 3)
            compression *= bit_compression
            if not compressive_only or compression < best_compression:
                natural_bottlenecks.append({
                    'layer_name': "block_{}".format(block_number),
                    'compression': compression,
                    'cnn_layer_number': cnn_count  # ここでCNNレイヤーの番号を記録
                })
                best_compression = compression
        output = module(mock_input)
        mock_input = output.detach()
        
        cnn_count += count_conv2d_layers(module)


    return natural_bottlenecks

def count_conv2d_layers(model):
    count = 0
    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            count += 1
        elif isinstance(module, nn.Sequential):
            # Sequentialブロック内でさらにConv2dを探す
            for sub_module in module:
                if isinstance(sub_module, nn.Conv2d):
                    count += 1
    return count

# ボトルネックを取得
bottlenecks = get_natural_bottlenecks_pytorch(model, input_size=224)
for bottleneck in bottlenecks:
    print(bottleneck)
    
# cnn_layer_numberのみをリストに収集
cnn_layer_numbers = [bottleneck['cnn_layer_number'] for bottleneck in bottlenecks]

print(cnn_layer_numbers)
print(len(cnn_layer_numbers))

abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]
Encountered BasicBlock at features.0
{'layer_name': 'block_1', 'compression': 0.6666666666666666, 'cnn_layer_number': 0}
{'layer_name': 'block_2', 'compression': 0.3333333333333333, 'cnn_layer_number': 4}
{'layer_name': 'block_3', 'compression': 0.125, 'cnn_layer_number': 9}
{'layer_name': 'block_5', 'compression': 0.052083333333333336, 'cnn_layer_number': 19}
{'layer_name': 'block_7', 'compression': 0.026041666666666668, 'cnn_layer_number': 29}
{'layer_name': 'block_13', 'compression': 0.015625, 'cnn_layer_number': 59}
[0, 4, 9, 19, 29, 59]
6


In [8]:
summary(model=mixeffnet_b3_w2468a2468_100(), input_size=(1, 3, 224, 224))   

abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]


In [7]:
model

EfficientNet(
  (pool): AdaptiveAvgPool2d(output_size=1)
  (features): Sequential(
    (0): BasicCNNBlock(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (silu): SiLU()
    )
    (1): InvertedResidualBlock(
      (conv): Sequential(
        (0): CNNBlock(
          (cnn): MixActivConv2d(
            (mix_activ): MixQuantActiv(
              (mix_activ): ModuleList(
                (0): HWGQ()
                (1): HWGQ()
                (2): HWGQ()
                (3): HWGQ()
              )
            )
            (mix_weight): SharedMixQuantConv2d(
              (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            )
          )
          (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (silu): SiLU()
        )
        (1): SqueezeExcitation(


In [14]:
import torch
import torchvision
from torchinfo import summary
model = torchvision.models.efficientnet_b3()

In [16]:
summary(model=model, input_size=(1, 3, 224, 224))   

Layer (type:depth-idx)                                  Output Shape              Param #
EfficientNet                                            [1, 1000]                 --
├─Sequential: 1-1                                       [1, 1536, 7, 7]           --
│    └─Conv2dNormActivation: 2-1                        [1, 40, 112, 112]         --
│    │    └─Conv2d: 3-1                                 [1, 40, 112, 112]         1,080
│    │    └─BatchNorm2d: 3-2                            [1, 40, 112, 112]         80
│    │    └─SiLU: 3-3                                   [1, 40, 112, 112]         --
│    └─Sequential: 2-2                                  [1, 24, 112, 112]         --
│    │    └─MBConv: 3-4                                 [1, 24, 112, 112]         2,298
│    │    └─MBConv: 3-5                                 [1, 24, 112, 112]         1,206
│    └─Sequential: 2-3                                  [1, 32, 56, 56]           --
│    │    └─MBConv: 3-6                            

In [128]:
model = models.efficientnet_b0()
model.features

In [131]:
# resnet 分割点

import torch
import torch.nn as nn
import torchvision.models as models

def get_natural_bottlenecks_pytorch(model, input_size, compressive_only=True):
    natural_bottlenecks = []
    best_compression = 1.0
    device = next(model.parameters()).device
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)

    for name, layer in model.named_children():
        # 処理する前にリシェイプを行う必要がある層をチェック
        if isinstance(layer, nn.AdaptiveAvgPool2d):
            # 平均プーリング層の出力をリシェイプ
            mock_input = layer(mock_input)
            mock_input = mock_input.view(mock_input.size(0), -1)
        else:
            try:
                output = layer(mock_input)
                input_size_layer = torch.prod(torch.tensor(mock_input.shape[1:])).item()
                mock_input = output
                output_size = torch.prod(torch.tensor(output.shape[1:])).item()

                compression = float(output_size) / input_size_layer
                if not compressive_only or compression < best_compression:
                    natural_bottlenecks.append({
                        'layer_name': name,
                        'compression': compression
                    })
                    best_compression = compression

            except RuntimeError as e:
                print(f"Error processing layer {name}: {e}")
                break

    return natural_bottlenecks

# ResNetモデルのインスタンス化
model = models.vgg11()

# ボトルネックの特定
bottlenecks = get_natural_bottlenecks_pytorch(model, input_size=224)
for bottleneck in bottlenecks:
    print(bottleneck)


{'layer_name': 'features', 'compression': 0.16666666666666666}
{'layer_name': 'classifier', 'compression': 0.03985969387755102}


In [8]:
model

EfficientNet(
  (pool): AdaptiveAvgPool2d(output_size=1)
  (features): Sequential(
    (0): BasicCNNBlock(
      (conv): Conv2d(3, 38, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(38, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (silu): SiLU()
    )
    (1): InvertedResidualBlock(
      (conv): Sequential(
        (0): CNNBlock(
          (cnn): MixActivConv2d(
            (mix_activ): MixQuantActiv(
              (mix_activ): ModuleList(
                (0): HWGQ()
                (1): HWGQ()
                (2): HWGQ()
                (3): HWGQ()
              )
            )
            (mix_weight): SharedMixQuantConv2d(
              (conv): Conv2d(38, 38, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=38, bias=False)
            )
          )
          (bn): BatchNorm2d(38, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (silu): SiLU()
        )
        (1): SqueezeExcitation(


In [9]:
nn.Sequential(*list(model.avgpool.children())[:-1])

AttributeError: 'EfficientNet' object has no attribute 'avgpool'

In [7]:
import torch
import torch.nn as nn
import time


# GPUの存在を確認
if torch.cuda.device_count() < 2:
    raise RuntimeError("このコードは少なくとも2つのGPUが必要です。")

def measure_inference_time(model, input_data, device):
    start_time = time.time()
    model(input_data.to(device))
    end_time = time.time()
    return end_time - start_time


# 全体モデルの定義
class FullModel(nn.Module):
    def __init__(self):
        super(FullModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(32 * 56 * 56, 10)
        )

    def forward(self, x):
        return self.layers(x)


def run_experiment(edge_model, server_model, input_data):
    # エッジ側での推論時間を計測
    edge_inference_time = measure_inference_time(edge_model, input_data, 'cuda:0')
    edge_output = edge_model(input_data)

    # データ転送時間を計測
    start_transfer_time = time.time()
    edge_output = edge_output.to('cuda:1')
    end_transfer_time = time.time()
    data_transfer_time = end_transfer_time - start_transfer_time

    # サーバー側での推論時間を計測
    server_inference_time = measure_inference_time(server_model, edge_output, 'cuda:1')

    # 合計時間を計算
    total_time = edge_inference_time + data_transfer_time + server_inference_time
    return total_time, edge_inference_time, data_transfer_time, server_inference_time

def main():
    # モデルのインスタンス化と分割
    full_model = FullModel()
    split_point = 4
    edge_model = nn.Sequential(*list(full_model.layers[:split_point])).to('cuda:0')
    server_model = nn.Sequential(*list(full_model.layers[split_point:])).to('cuda:1')

    # ダミーの入力データ
    input_data = torch.randn(1, 3, 224, 224).to('cuda:0')

    # 実験の実行
    total_time, edge_inference_time, data_transfer_time, server_inference_time = run_experiment(edge_model, server_model, input_data)

    # 結果の表示
    print(f'Total Inference Time: {total_time} seconds')
    print(f'Edge Inference Time: {edge_inference_time} seconds')
    print(f'Data Transfer Time: {data_transfer_time} seconds')
    print(f'Server Inference Time: {server_inference_time} seconds')

if __name__ == '__main__':
    main()




Total Inference Time: 10.895565032958984 seconds
Edge Inference Time: 10.031911134719849 seconds
Data Transfer Time: 0.00036787986755371094 seconds
Server Inference Time: 0.863286018371582 seconds


In [165]:
# for name, module in model.named_children():
#     print(name)
    
# # モデルのすべてのモジュールを列挙
# for name, module in model.named_modules():
#     print(name)

# for child in model.children():
#     print(child)

children = list(model.children())  #どんなモデルにも対応させる場合

# split_point = bottlenecks[0]['layer_index'] if bottlenecks else len(list(model.children()))

# モデルの分割
# children = list(model.children())
# edge_model = nn.Sequential(*children[:split_point]).to('cuda:0')
# server_model = nn.Sequential(*children[split_point:]).to('cuda:1')


In [31]:
model = mixeffnet_b0_w2468a2468_100()
model.features

abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]


Sequential(
  (0): BasicCNNBlock(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (silu): SiLU()
  )
  (1): InvertedResidualBlock(
    (conv): Sequential(
      (0): CNNBlock(
        (cnn): MixActivConv2d(
          (mix_activ): MixQuantActiv(
            (mix_activ): ModuleList(
              (0): HWGQ()
              (1): HWGQ()
              (2): HWGQ()
              (3): HWGQ()
            )
          )
          (mix_weight): SharedMixQuantConv2d(
            (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          )
        )
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (silu): SiLU()
      )
      (1): SqueezeExcitation(
        (se): Sequential(
          (0): AdaptiveAvgPool2d(output_size=1)
          (1): MixActivConv2d(
            (mix

In [6]:
def split_efficientnet_model(model, split_layer, edge_device, server_device):
    edge_model = nn.Sequential(*list(model.features[:split_layer])).to(edge_device)
    server_model_layers = list(model.features[split_layer:]) + [model.pool] +[nn.Flatten()]
    server_model_layers += list(model.classifier)
    server_model = nn.Sequential(*server_model_layers).to(server_device)
    return edge_model, server_model

def main():
    # モデルのインスタンス化
    model = mixeffnet_b0_w2468a2468_100()

    # ボトルネック層を取得
    bottlenecks = get_natural_bottlenecks_pytorch(model, input_size=224)
    #split_layer = bottlenecks[0]['cnn_layer_number']  # 最初のボトルネック層
    split_layer = 4
    # モデルを分割
    edge_device = torch.device("cuda:0")
    server_device = torch.device("cuda:1")
    edge_model, server_model = split_efficientnet_model(model, split_layer, edge_device, server_device)
    input_data = torch.randn(1, 3, 224, 224).to(edge_device)
    # 以降、エッジとサーバーでの推論処理を実装
    # エッジ側での推論
    start_time = time.time()
    edge_output = edge_model(input_data)
    print(f"Edge output shape: {edge_output.shape}")  # 出力形状の印刷
    edge_time = time.time() - start_time
    
    # エッジからサーバーへのデータ転送
    start_time = time.time()
    edge_output = edge_output.to(server_device)
    transfer_time = time.time() - start_time
    
    # サーバー側での推論
    start_time = time.time()
    server_output = server_model(edge_output)
    server_time = time.time() - start_time
    
    total_time = edge_time + transfer_time + server_time
    print(f"Total Inference Time: {total_time} seconds")
    print(f"Edge Inference Time: {edge_time} seconds")
    print(f"Data Transfer Time: {transfer_time} seconds")
    print(f"Server Inference Time: {server_time} seconds")
    
    
if __name__ == '__main__':
    main()


abits: [2, 4, 6, 8]
wbits: [2, 4, 6, 8]
Encountered BasicBlock at features.0


NameError: name 'time' is not defined

In [36]:
def get_inference_time(model, batch_size, device, repetitions=100, input_shape=None, intermediate=None,):
    
    """
    Get the inference time of a model for a given input shape and batch size.
    """
    # モデルを評価モードに設定し、適切なデバイスに移動
    model = model.eval().to(device)

    if input_shape is None:
        input_shape = (batch_size, 3, config['image_size'], config['image_size'])
    else:
        input_shape = (batch_size,) + input_shape[1:]
    
    if intermediate is None:
        input_data = torch.ones(input_shape, device=device)  # the original code uses dtype=torch.float16, which would be 2 bytes
    else:
        input_data = intermediate
    # input_data = torch.randn(1, 3, 224, 224).to(device)
    # ウォームアップフェーズ
    with torch.no_grad():
        for _ in range(repetitions):
            model(input_data)
            
    # CUDAカーネルの同期化        
    torch.cuda.synchronize(device)  # Make sure all CUDA operations have finished

    # 推論時間の計測開始
    start = time.perf_counter()
    
    with torch.no_grad():
        for _ in range(repetitions):
            model(input_data)
            
    # 再度、CUDAカーネルの同期化
    torch.cuda.synchronize(device)
    end = time.perf_counter()
    
    # 平均推論時間をミリ秒単位で計算
    inference_time = (end - start) / repetitions * 100  # in milliseconds
    return inference_time
# model = mixeffnet_b0_w2468a2468_100()
model = quanteffnet_w8a8("aaa")
# model = models.efficientnet_b0(pretrained=True)
# model = quanteffnet_cfg_2468("/DynamicSplit/arch_model_best.pth.tar")
print(get_inference_time(model, 30, 'cuda:1'))

archas: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
archws: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
23.305328699061647


In [35]:
config = {
    'processors': {
        'weak': 'cuda:1',
        'strong': 'cuda:0',
    },
    'model_name': 'efficientnet-b0',
    'image_size': 224,
    'batch_sizes': list(range(1, 2)),
    'max_bandwidth': 128 * 10 ** 6,  # Bytes per second
    'min_bandwidth': 1 * 10 ** 6,  # Bytes per second
    'bandwidth_step': 1 * 10 ** 6,  # Bytes per second
}
