1、用于测试模型的Params\GFLOPs\FPS

In [1]:
import torch.nn as nn
import numpy as np
import torch
import copy
from thop import profile
from thop import clever_format
import torch.nn.functional as F
import math
from torchstat import stat

def show_macs_params(model, img_size=(256, 256), dummy_input=None):
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    model.to(device)
    model.eval()
    if dummy_input is None:
        dummy_input = torch.randn(1, 3, img_size[0], img_size[1], dtype=torch.float)
    dummy_input = dummy_input.to(device)
    
     # macs == FLOPS, GFLOPS == 1e12 * FLOPS
    macs, params = profile(model, inputs=(dummy_input,), verbose=False) 
    print(f"{model._get_name()}\t\t{macs=}\t{params=}")
    print("FLOPs=", str(macs/1e9) +'{}'.format("G"), end='\t')
    print("params=", str(params/1e6)+'{}'.format("M"))
    macs, params = clever_format([macs, params], "%.3f")
    print(f"{macs=}\t{params=}")


def inference_speed(model, img_size=(256, 256), dummy_input=None):
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    model.to(device)
    model.eval()
    if dummy_input is None:
        dummy_input = torch.randn(1, 3, img_size[0], img_size[1], dtype=torch.float)
    dummy_input = dummy_input.to(device)

    starter = torch.cuda.Event(enable_timing=True)
    ender = torch.cuda.Event(enable_timing=True)

    repetitions = 300
    timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    for _ in range(10):
        _ = model(dummy_input)
    # MEASURE PERFORMANCE
    with torch.no_grad():
        for rep in range(repetitions):
            starter.record()
            _ = model(dummy_input)
            ender.record()
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings[rep] = curr_time
    mean_syn = np.sum(timings) / repetitions
    std_syn = np.std(timings)
    mean_fps = 1000. / mean_syn
    print('Mean@ {mean_syn:.3f}ms Std@ {std_syn:.3f}ms FPS@ {mean_fps:.2f}'\
        .format(mean_syn=mean_syn, std_syn=std_syn, mean_fps=mean_fps))
    # ! @n 中的n是什么意思？
    # print(' * Mean@1 {mean_syn:.3f}ms Std@5 {std_syn:.3f}ms FPS@1 {mean_fps:.2f}'\
    #     .format(mean_syn=mean_syn, std_syn=std_syn, mean_fps=mean_fps))

In [9]:
from models import constant_init, normal_init
from models.pose_estimation.liteHandNet.common import channel_shuffle, ChannelAttension, SEBlock
from models.pose_estimation.liteHandNet.repblocks import RepConv, RepBlock
import numpy as np
import math


class MSRB(nn.Module):
    def __init__(self, channels, ca_type='none'):
        super().__init__()
        self.half_channels = channels // 2
        self.branch1 = nn.ModuleList([
            RepConv(self.half_channels, self.half_channels, 3, 1, 1,
                    groups=self.half_channels, activation=None),
            RepConv(self.half_channels, self.half_channels, 3, 1, 1,
                    groups=self.half_channels, activation=None)
             ])
        self.branch2 = nn.ModuleList([
            RepConv(self.half_channels, self.half_channels, 3, 1, 2, 2,
                    groups=self.half_channels, activation=None),
            RepConv(self.half_channels, self.half_channels, 3, 1, 2, 2,
                    groups=self.half_channels, activation=None)
            ])

        # 信息交换，通道注意力模块
        if ca_type == 'se':
            self.ca = nn.ModuleList([
                SEBlock(channels, internal_neurons=channels // 16),
                SEBlock(channels, internal_neurons=channels // 16)])
        elif ca_type == 'ca':
            self.ca = nn.ModuleList([ChannelAttension(channels),
                                     ChannelAttension(channels)])
        else:
            self.ca = nn.ModuleList([nn.Identity(), nn.Identity()])
        
        self.nonlinearity = nn.LeakyReLU()
        self.conv = RepConv(channels, channels, 1, 1, 0, groups=channels, activation=None)

    def forward(self, x):
        out = x
        for _b1, _b2, _ca in zip(self.branch1, self.branch2, self.ca):
            left, right = torch.chunk(out, 2, dim=1)
            left = _b1(left)
            right = _b2(right)
            out = self.nonlinearity(_ca(torch.cat([left, right], dim=1)))
        return self.conv(out + x)


class RepBasicUnit(nn.Module):
    def __init__(self, in_channels, out_channels, ca_type='ca'):
        super(RepBasicUnit, self).__init__()
        self.left_part = in_channels // 2
        self.right_part_in = in_channels - self.left_part
        self.right_part_out = out_channels - self.left_part

        self.conv1 = nn.Sequential(
            RepConv(self.right_part_in, self.right_part_out, kernel=1),
            RepConv(self.right_part_out, self.right_part_out, kernel=3,
                    padding=1, groups=self.right_part_out, activation=None),
        )
        self.conv2 = RepConv(out_channels, out_channels, 1, 1, 0, groups=out_channels, activation=nn.LeakyReLU)


        if ca_type == 'se':
            self.ca = SEBlock(out_channels, internal_neurons=out_channels // 16)
        elif ca_type == 'ca':
            self.ca = ChannelAttension(out_channels)
        elif ca_type == 'none':
            self.ca = nn.Identity()
        else:
            raise ValueError(f'<{ca_type=}> not in se|ca|none')

    def forward(self, x):
        left = x[:, :self.left_part, :, :]
        right = x[:, self.left_part:, :, :]
        out = self.conv1(right)
        out = self.ca(self.conv2(torch.cat((left, out), 1)))
        return out


class DWConv_ELAN(nn.Module):
    def __init__(self, in_channel, out_channel):
        super().__init__()
        mid_channel = in_channel // 2
        self.conv1 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            RepConv(mid_channel, mid_channel, 1, 1, 0),
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            RepConv(mid_channel, mid_channel, 1, 1, 0),
        )
        self.conv2 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            RepConv(mid_channel, mid_channel, 1, 1, 0),
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            RepConv(mid_channel, mid_channel, 1, 1, 0),
        )
        self.conv3 = nn.Conv2d(4 * mid_channel, out_channel, 1, 1, 0)
        self.c = mid_channel

    def forward(self, x):
        out1 = self.conv1(x[:, :self.c, :, :])
        out2 = self.conv2(out1)
        out = self.conv3(torch.cat([x, out1, out2], dim=1))
        out = channel_shuffle(out, groups=2)
        return out


class EncoderDecoder(nn.Module):
    def __init__(self, num_stage=4, channel=128, ca_type='ca'):
        super().__init__()
        self.num_stage = num_stage
        self.encoder = nn.ModuleList([])
        self.decoder = nn.ModuleList([])

        self.maxpool = nn.MaxPool2d(2, 2)
        for _ in range(num_stage):
            self.encoder.append(nn.Sequential(
                    RepBasicUnit(channel, channel, ca_type=ca_type),
                    RepBasicUnit(channel, channel, ca_type='none'),
                    ))
            self.decoder.append(nn.Sequential(
                    RepBasicUnit(channel, channel, ca_type='none'),
                    RepBasicUnit(channel, channel, ca_type=ca_type),
                    ))

    def forward(self, x):
        out_encoder = []   # [128, 64, 32, 16, 8, 4]
        out_decoder = []   # [4, 8, 16, 32, 64, 128]

        # encoder 
        for i in range(self.num_stage):
            x = self.encoder[i](x)
            out_encoder.append(x)
            if i != self.num_stage - 1:
                x = self.maxpool(x)

        # decoder
        for i in range(self.num_stage-1, -1, -1):
            counterpart = out_encoder[i]
            if i == self.num_stage-1:
                x = self.decoder[i](counterpart)
                h, w = out_encoder[-1].shape[2:]
                shortcut = F.adaptive_avg_pool2d(out_encoder[0], (h, w))
                x = x + shortcut
            else:
                x = F.interpolate(x, size=counterpart.shape[2:])
                x = x + counterpart
                x = self.decoder[i](x)
            out_decoder.append(x)
        return tuple(out_decoder) 


class Stem(nn.Module):
    def __init__(self, channel, ca_type='ca'):
        super().__init__()
        mid_channel = max(channel // 4, 32)
        self.conv1 = nn.Sequential(
            RepConv(3, mid_channel, 3, 2, 1),
            RepConv(mid_channel, mid_channel, 7, 1, 3, groups=mid_channel, activation=None)
        )
        self.branch1 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 1, 1, 0),
            RepConv(mid_channel, mid_channel, 3, 2, 1,
                    groups=mid_channel, activation=None),
            RepConv(mid_channel, mid_channel, 1, 1, 0),
        )
        self.branch2 = nn.MaxPool2d(2, 2, ceil_mode=True)
        
        self.conv2 = nn.Sequential(
            RepConv(2*mid_channel, channel),
            MSRB(channel, ca_type=ca_type),
            RepConv(channel, channel),
        )

    def forward(self, x):
        out = self.conv1(x)
        b1 = self.branch1(out)
        b2 = self.branch2(out)
        out = self.conv2(torch.cat([b1, b2], dim=1))
        return out


class LiteHandNet(nn.Module):
    def __init__(self, cfg, deploy=False):
        super().__init__()
        num_stage=cfg.MODEL.get('num_stage', 4)
        ca_type=cfg.MODEL.get('ca_type', 'ca')
        input_channel=cfg.MODEL.get('input_channel', 256)
        output_channel=cfg.MODEL.get('output_channel', cfg.DATASET.num_joints)

        self.deploy=deploy
        self.stem = Stem(input_channel, ca_type=ca_type)
        self.backone = EncoderDecoder(num_stage, input_channel, ca_type=ca_type)

        self.neck = nn.Sequential(  
                RepBasicUnit(input_channel, input_channel, ca_type=ca_type),
                RepConv(input_channel, input_channel, 1, 1, 0),
            )
        self.head = nn.Conv2d(input_channel, output_channel, 1, 1, 0)
        self.init_weights()

    def forward(self, x):
        out = self.stem(x)
        out_list = self.backone(out)
        out = self.neck(out_list[-1])
        out = self.head(out)
        return out

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # kaiming_init(m)
                normal_init(m)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                constant_init(m, 1)

    def deploy_model(self):
        for m in self.modules():
            if hasattr(m, 'switch_to_deploy'):
                m.switch_to_deploy()
        self.deploy = True


1、LiteHandnet

In [10]:
import addict
cfg = dict(MODEL=dict(
    num_stage=4,
    ca_type='ca',  # 'ca' | 'se' | 'none'
    input_channel=128,
    output_channel=21,
))
x = torch.rand(1, 3, 256, 256)
net = LiteHandNet(addict.Dict(cfg))
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)
print()
net.deploy_model()
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

# 2、Hourglass

In [None]:
from torch import nn
import torch


Pool = nn.MaxPool2d

def batchnorm(x):
    return nn.BatchNorm2d(x.size()[1])(x)


class Conv(nn.Module):
    def __init__(self, inp_dim, out_dim, kernel_size=3, stride = 1, bn = False, relu = True):
        super(Conv, self).__init__()
        self.inp_dim = inp_dim
        self.conv = nn.Conv2d(inp_dim, out_dim, kernel_size, stride, padding=(kernel_size-1)//2, bias=True)
        self.relu = None
        self.bn = None
        if relu:
            self.relu = nn.ReLU()
        if bn:
            self.bn = nn.BatchNorm2d(out_dim)

    def forward(self, x):
        assert x.size()[1] == self.inp_dim, "{} {}".format(x.size()[1], self.inp_dim)
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

class Residual(nn.Module):
    def __init__(self, inp_dim, out_dim):
        super(Residual, self).__init__()
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(inp_dim)
        self.conv1 = Conv(inp_dim, int(out_dim/2), 1, relu=False)
        self.bn2 = nn.BatchNorm2d(int(out_dim/2))
        self.conv2 = Conv(int(out_dim/2), int(out_dim/2), 3, relu=False)
        self.bn3 = nn.BatchNorm2d(int(out_dim/2))
        self.conv3 = Conv(int(out_dim/2), out_dim, 1, relu=False)
        if inp_dim == out_dim:
            self.skip_layer = nn.Identity()
        else:
            self.skip_layer = Conv(inp_dim, out_dim, 1, relu=False)
        
    def forward(self, x):
        residual = self.skip_layer(x)
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn3(out)
        out = self.relu(out)
        out = self.conv3(out)
        out += residual
        return out 

class HourglassModule(nn.Module):
    def __init__(self, n, f, bn=None, increase=0):
        super(HourglassModule, self).__init__()
        nf = f + increase
        self.up1 = Residual(f, f)
        # Lower branch
        self.pool1 = Pool(2, 2)
        self.low1 = Residual(f, nf)
        self.n = n
        # Recursive hourglass
        if self.n > 1:
            self.low2 = HourglassModule(n-1, nf, bn=bn)
        else:
            self.low2 = Residual(nf, nf)
        self.low3 = Residual(nf, f)
        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')

    def forward(self, x):
        up1  = self.up1(x)
        pool1 = self.pool1(x)
        low1 = self.low1(pool1)
        low2 = self.low2(low1)
        low3 = self.low3(low2)
        up2  = self.up2(low3)
        return up1 + up2

class Merge(nn.Module):
    def __init__(self, x_dim, y_dim):
        super(Merge, self).__init__()
        self.conv = Conv(x_dim, y_dim, 1, relu=False, bn=False)

    def forward(self, x):
        return self.conv(x)

class HourglassNet(nn.Module):
    """https://github.com/princeton-vl/pytorch_stacked_hourglass"""
    def __init__(self, cfg):
        super(HourglassNet, self).__init__()
        num_stack = cfg.MODEL.get('num_stack', 8)
        num_level = cfg.MODEL.get('num_level', 4)
        inp_dim = cfg.MODEL.get('input_channel', 256)
        oup_dim = cfg.MODEL.get('output_channel', 21)

        self.num_stack = num_stack
        self.pre = nn.Sequential(
            Conv(3, 64, 7, 2, bn=True, relu=True),
            Residual(64, 128),
            Pool(2, 2),
            Residual(128, 128),
            Residual(128, inp_dim)
        )

        self.hgs = nn.ModuleList( [
        nn.Sequential(
            HourglassModule(num_level, inp_dim, bn=False, increase=0),
        ) for _ in range(num_stack)] )

        self.features = nn.ModuleList([
        nn.Sequential(
            Residual(inp_dim, inp_dim),
            Conv(inp_dim, inp_dim, 1, bn=True, relu=True)
        ) for _ in range(num_stack)] )

        self.outs = nn.ModuleList([Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(num_stack)] )
        self.merge_features = nn.ModuleList([Merge(inp_dim, inp_dim) for _ in range(num_stack-1)] )
        self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for _ in range(num_stack-1)] )
        self.num_stack = num_stack

    def forward(self, imgs):
        ## our posenet
        # x = imgs.permute(0, 3, 1, 2) #x of size 1,3,inpdim,inpdim
        x = self.pre(imgs)
        outs = []
        for i in range(self.num_stack):
            hg = self.hgs[i](x)
            feature = self.features[i](hg)
            preds = self.outs[i](feature)
            outs.append(preds)
            if i < self.num_stack - 1:
                x = x + self.merge_preds[i](preds) + self.merge_features[i](feature)
        return torch.stack(outs, dim=1)  # [N, num_stack, K, H, W]



## 2.1 hourglass test

In [None]:
import addict
cfg = dict(MODEL=dict(
    num_stack=1,
    num_level=4,
    input_channel=256,
    output_channel=21,
))
x = torch.rand(1, 3, 256, 256)
net = HourglassNet(addict.Dict(cfg))
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)
print()


HourglassNet		macs=5202833408.0	params=3427733.0
FLOPs= 5.202833408G	params= 3.427733M
macs='5.203G'	params='3.428M'
Mean@ 9.872ms Std@ 0.411ms FPS@ 101.30



In [None]:
import torch
from torch import nn
from torch.nn import functional as F
from models import kaiming_init, constant_init, normal_init
from models.pose_estimation.liteHandNet.common import SEBlock, channel_shuffle, ChannelAttension
from models.pose_estimation.liteHandNet.repblocks import RepConv, RepBlock

class DWConv(nn.Module):
    """DepthwiseSeparableConvModul 深度可分离卷积"""
    def __init__(self, in_channel, out_channel, stride=1, padding=1, dilation=1,
                 activation=nn.LeakyReLU):
        super().__init__()
        self.depthwise_conv = RepConv(in_channel, in_channel, 3, stride, padding,
                                       groups=in_channel, dilation=dilation,
                                       activation=activation, inplace=False)
        self.pointwise_conv = RepConv(in_channel, out_channel, 1, 1, 0,
                                      activation=activation, inplace=False)

    def forward(self, x):
        out = self.depthwise_conv(x)
        out = self.pointwise_conv(out)
        return out
 
class BottleNeck(nn.Module):
    """用于提高深度,但尽可能少地增加运算量, 不改变通道数"""
    def __init__(self, channel, reduction=4, activation=nn.LeakyReLU):
        super(BottleNeck, self).__init__()
        mid_channel = channel // reduction
        self.conv = nn.Sequential(
            RepConv(channel, mid_channel, 1, 1, 0,
                    activation=activation, inplace=True),
            RepBlock(mid_channel, mid_channel, 3, 1, 1,
                     activation=activation, inplace=True, identity=False),
            RepConv(mid_channel, channel, 1, 1, 0,
                    activation=None),
        )
    def forward(self, x):
        return F.relu(x + self.conv(x))


class BasicBlock(nn.Module):
    def __init__(self, inp_dim, out_dim, stride=1, activation=nn.LeakyReLU):
        super(BasicBlock, self).__init__()
        self.conv = nn.Sequential(
            RepConv(inp_dim, out_dim, 3, stride, 1,
                     activation=activation, inplace=True),
            RepBlock(inp_dim, out_dim, 3, 1, 1,
                     activation=None, identity=False)
        )
        if stride == 2 or inp_dim != out_dim:
            self.skip_layer = RepConv(inp_dim, out_dim, 1, stride, 0, activation=None)
        else:
            self.skip_layer = nn.Identity()
    def forward(self, x):
        return F.relu(self.skip_layer(x) + self.conv(x))


class Residual(nn.Module):
    def __init__(self, inp_dim, out_dim, stride=2, num_block=2,
                 reduction=2, activation=nn.LeakyReLU):
        super().__init__()
        self.conv1 = BasicBlock(inp_dim, out_dim, stride, activation)
        self.blocks = nn.Sequential(
            *[BottleNeck(out_dim, reduction, activation) for _ in range(num_block)])

    def forward(self, x):
        out = self.conv1(x)
        out = self.blocks(out)
        return out


class EncoderDecoder(nn.Module):
    def __init__(self, num_levels=5, inp_dim=128, num_blocks=[],
                 ca_type='ca', reduction=2, activation=nn.LeakyReLU):
        super().__init__()
        self.num_levels = num_levels
        self.encoder = nn.ModuleList([])
        self.decoder = nn.ModuleList([])
        assert len(num_blocks) == num_levels - 1

        self.encoder.append(MSAB(inp_dim, inp_dim, ca_type=ca_type))
        for i in range(num_levels-1):
            self.encoder.append(
                Residual(inp_dim, inp_dim, 2, num_blocks[i], reduction, activation))
            self.decoder.append(
                Residual(inp_dim, inp_dim, 1, num_blocks[i], reduction, activation))
        self.decoder.append(MSAB(inp_dim, inp_dim, ca_type=ca_type))

    def forward(self, x):
        out_encoder = []   # [128, 64, 32, 16, 8, 4]
        out_decoder = []   # [4, 8, 16, 32, 64, 128]

        # encoder 
        for encoder_layer in self.encoder:
            x = encoder_layer(x)
            out_encoder.append(x)

        # ! 我觉得只添加一次简单的shortcut就够了
        h, w = out_encoder[-1].shape[2:]
        shortcut = F.adaptive_avg_pool2d(out_encoder[0], (h, w))

        # decoder  
        for i, decoder_layer in enumerate(self.decoder):
            counterpart = out_encoder[self.num_levels-1-i]
            if i == 0:
                x = decoder_layer(counterpart)
                x = x + shortcut
            else:
                h, w = counterpart.shape[2:]
                x = decoder_layer(x)
                x = F.interpolate(x, size=(h, w))
                x = x + counterpart
            out_decoder.append(x)
        return tuple(out_decoder) 


class MSAB(nn.Module):
    """
    https://blog.csdn.net/KevinZ5111/article/details/104730835?utm_medium=distribute.pc_aggpage_search_result.none-task-blog-2~aggregatepage~first_rank_ecpm_v1~rank_v31_ecpm-4-104730835.pc_agg_new_rank&utm_term=block%E6%94%B9%E8%BF%9B+residual&spm=1000.2123.3001.4430
    """
    def __init__(self, in_c, out_c, ca_type='ca', activation=nn.LeakyReLU):
        super().__init__()

        mid_c = in_c // 2
        self.conv1 = RepConv(in_c, mid_c, 1, 1, 0, activation=activation, inplace=True)

        self.mid1_conv = nn.ModuleList([
            nn.Sequential(
                DWConv(mid_c, mid_c // 2, activation=activation),
                DWConv(mid_c // 2, mid_c // 2, activation=activation)
            ), 
            nn.Sequential(
                DWConv(mid_c, mid_c, activation=activation),
                DWConv(mid_c, mid_c, activation=activation),        
            )])

        self.mid2_conv = nn.ModuleList([
            nn.Sequential(
                DWConv(mid_c, mid_c // 2, dilation=2, padding=2, activation=activation),
                DWConv(mid_c // 2, mid_c // 2, activation=activation)),
            nn.Sequential(
                DWConv(mid_c, mid_c, dilation=2, padding=2, activation=activation),
                DWConv(mid_c, mid_c, activation=activation))
            ])

        self.conv2 = RepConv(in_c, out_c, 1, 1, 0, activation=activation, inplace=True)
        
        if ca_type == 'se':
            self.ca = SEBlock(out_c, internal_neurons=out_c // 16)
        elif ca_type == 'ca':
            self.ca = ChannelAttension(out_c)
        elif ca_type == 'none':
            self.ca = nn.Identity()
        else:
            raise ValueError(f'<{ca_type=}> not in se|ca|none')

    def forward(self, x):
        m = self.conv1(x)
        for i in range(2):
            m1 = self.mid1_conv[i](m)
            m2 = self.mid2_conv[i](m)
            m = torch.cat([m1, m2], dim=1)

        features = m + x
        out = self.conv2(features)
        out = self.ca(out)
        return out


class Stem(nn.Module):
    """ 我在Conv1中再加了一个3x3卷积, 来提高stem的初始感受野"""
    def __init__(self, out_channel=256, min_mid_c=32, activation=nn.LeakyReLU):
        super().__init__()
        mid_channel = out_channel // 4 if out_channel // 4 >= min_mid_c else min_mid_c

        self.conv1 = nn.Sequential(
            RepBlock(3, mid_channel, 3, 2, 1, activation=activation, inplace=True),
            RepBlock(mid_channel, mid_channel, 7, 1, 3, groups=mid_channel,
                     activation=activation, inplace=True)
        )
        self.branch1 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 1, 1, 0, activation=activation, inplace=True),
            RepConv(mid_channel, mid_channel, 3, 2, 1, activation=activation, inplace=True),
        )
        self.branch2 = nn.MaxPool2d(2, 2, ceil_mode=True)
        self.conv1x1 = nn.Conv2d(mid_channel * 2, out_channel, 1, 1, 0)

    def forward(self, x):
        out = self.conv1(x)
        b1 = self.branch1(out)
        b2 = self.branch2(out)
        out = torch.cat([b1, b2], dim=1)
        out = self.conv1x1(out)
        return out


class LiteHandNet(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        num_stage=cfg.MODEL.get('num_stage', 4)
        inp_dim=cfg.MODEL.get('input_channel', 128)
        oup_dim=cfg.MODEL.get('output_channel', cfg.DATASET.num_joints)
        num_block=cfg.MODEL.get('num_block', [2, 2, 2])
        ca_type = cfg.MODEL.get('ca_type', 'ca')  # 'ca' | 'se' | 'none'
        reduction = cfg.MODEL.get('reduction', 2)
        activation = cfg.MODEL.get('activation', nn.LeakyReLU)
        assert reduction in [2, 4]
        assert ca_type in ['ca', 'se', 'none']

        self.pre = Stem(inp_dim, activation=activation)
        self.hgs = EncoderDecoder(num_stage, inp_dim, num_block,
                                  ca_type, reduction, activation) 

        self.features = nn.Sequential(
                BottleNeck(inp_dim, 2, activation),
                RepConv(inp_dim, inp_dim, 1, 1, 0, activation=activation, inplace=True),
            )

        self.out_layer = nn.Conv2d(inp_dim, oup_dim, 1, 1, 0)
        self.init_weights()

    def forward(self, imgs):
        # our posenet
        x = self.pre(imgs)
        hg = self.hgs(x)
        feature = self.features(hg[-1])
        preds = self.out_layer(feature)
        return preds

    def init_weights(self):
        for m in self.modules():
            normal_init(m)

    def deploy_model(self):
        for m in self.modules():
            if hasattr(m, 'switch_to_deploy'):
                m.switch_to_deploy()
        self.deploy = True

# 3、MyNet

In [6]:
import addict
from models import mynet
stage = 4
cfg = dict(MODEL=dict(
    num_stage=stage,
    num_block=[2, 3, 4],
    input_channel=128,
    ca_type='ca',
    reduction=4,
    activation=nn.LeakyReLU,
    output_channel=21,
))
x = torch.rand(1, 3, 256, 256)
net = mynet(addict.Dict(cfg))
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)
print()

# net = LiteHandNet(addict.Dict(cfg))
# net.eval()
# y = net(x)
# show_macs_params(net, dummy_input=x)
# inference_speed(net, dummy_input=x)
# print()

# net.deploy_model()
# show_macs_params(net, dummy_input=x)
# inference_speed(net, dummy_input=x)
# print()

MultiScaleAttentionHourglass		macs=1142708096.0	params=2294357.0
FLOPs= 1.142708096G	params= 2.294357M
macs='1.143G'	params='2.294M'
Mean@ 20.757ms Std@ 2.145ms FPS@ 48.18

