In [5]:
import torch.nn as nn
import numpy as np
import torch
import copy
from thop import profile
from thop import clever_format
import torch.nn.functional as F
import math

def show_macs_params(model, img_size=(256, 256), dummy_input=None):
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    model.to(device)
    model.eval()
    if dummy_input is None:
        dummy_input = torch.randn(1, 3, img_size[0], img_size[1], dtype=torch.float)
    dummy_input = dummy_input.to(device)
    
     # macs == FLOPS, GFLOPS == 1e12 * FLOPS
    macs, params = profile(model, inputs=(dummy_input,), verbose=False) 
    print(f"{model._get_name()}\t\t{macs=}\t{params=}")
    print("FLOPs=", str(macs/1e9) +'{}'.format("G"), end='\t')
    print("params=", str(params/1e6)+'{}'.format("M"))
    macs, params = clever_format([macs, params], "%.3f")
    print(f"{macs=}\t{params=}")


def inference_speed(model, img_size=(256, 256), dummy_input=None):
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    model.to(device)
    model.eval()
    if dummy_input is None:
        dummy_input = torch.randn(1, 3, img_size[0], img_size[1], dtype=torch.float)
    dummy_input = dummy_input.to(device)

    starter = torch.cuda.Event(enable_timing=True)
    ender = torch.cuda.Event(enable_timing=True)

    repetitions = 1000
    timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    for _ in range(10):
        _ = model(dummy_input)
    # MEASURE PERFORMANCE
    with torch.no_grad():
        for rep in range(repetitions):
            starter.record()
            _ = model(dummy_input)
            ender.record()
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings[rep] = curr_time
    mean_syn = np.sum(timings) / repetitions
    std_syn = np.std(timings)
    mean_fps = 1000. / mean_syn
    print('Mean@ {mean_syn:.3f}ms Std@ {std_syn:.3f}ms FPS@ {mean_fps:.2f}'\
        .format(mean_syn=mean_syn, std_syn=std_syn, mean_fps=mean_fps))
    # ! @n 中的n是什么意思？
    # print(' * Mean@1 {mean_syn:.3f}ms Std@5 {std_syn:.3f}ms FPS@1 {mean_fps:.2f}'\
    #     .format(mean_syn=mean_syn, std_syn=std_syn, mean_fps=mean_fps))

# 测试LiteHandNet

In [3]:
import addict
from models import litehandnet
cfg = dict(MODEL=dict(
    num_stage=4,
    num_stack=1,
    msrb_ca='ca',  # 'ca' | 'se' | 'none'
    rbu_ca='ca',  # 'ca' | 'se' | 'none'
    input_channel=256,
    output_channel=21,
))

cfg = addict.Dict(cfg)
model = litehandnet(cfg)

show_macs_params(model, img_size=(256, 256))
inference_speed(model, img_size=(256, 256))
y = model(torch.rand(1, 3, 256, 256, device=torch.device(0)))
print(y.shape)

# 推理模型
model.deploy_model()
show_macs_params(model, img_size=(256, 256))
inference_speed(model, img_size=(256, 256))
y = model(torch.rand(1, 3, 256, 256, device=torch.device(0)))
print(y.shape)

TypeError: __init__() got an unexpected keyword argument 'identity'

In [None]:
from models.pose_estimation.liteHandNet.repblocks import RepConv
from models.pose_estimation.liteHandNet.common import SEBlock, ChannelAttension, channel_shuffle

class Stem(nn.Module):
    def __init__(self, channel):
        super().__init__()
        mid_channel = max(channel // 4, 32)
        self.conv1 = nn.Sequential(
            RepConv(3, mid_channel, 3, 2, 1),
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel)
        )
        self.branch1 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 1, 1, 0),
            RepConv(mid_channel, mid_channel, 3, 2, 1,
                    groups=mid_channel, activation=None),
            RepConv(mid_channel, mid_channel, 1, 1, 0),
        )
        self.branch2 = nn.MaxPool2d(2, 2, ceil_mode=True)
        self.conv2 = RepConv(2*mid_channel, channel, 1, 1, 0)

    def forward(self, x):
        out = self.conv1(x)
        b1 = self.branch1(out)
        b2 = self.branch2(out)
        out = self.conv2(torch.cat([b1, b2], dim=1))
        return out

Stem4x4 = nn.Sequential(
    nn.Conv2d(3, 256, 4, 4),
    nn.BatchNorm2d(256)
)

stem1 = Stem(256)
stem2 = Stem4x4


show_macs_params(stem1)
inference_speed(stem1)
print()
show_macs_params(stem2)
inference_speed(stem2)

Stem		macs=272105472.0	params=44992.0
FLOPs= 0.272105472G	params= 0.044992M
macs='272.105M'	params='44.992K'
Mean@ 1.073ms Std@ 0.158ms FPS@ 931.71

Sequential		macs=53477376.0	params=13056.0
FLOPs= 0.053477376G	params= 0.013056M
macs='53.477M'	params='13.056K'
Mean@ 0.202ms Std@ 0.061ms FPS@ 4941.16


In [None]:
class RepBasicUnit(nn.Module):
    def __init__(self, in_channels, out_channels, ca_type='ca'):
        super(RepBasicUnit, self).__init__()
        self.left_part = in_channels // 2
        self.right_part_in = in_channels - self.left_part
        self.right_part_out = out_channels - self.left_part

        self.conv = nn.Sequential(
            RepConv(self.right_part_in, self.right_part_out, kernel=1, activation=nn.ReLU6),
            RepConv(self.right_part_out, self.right_part_out, kernel=3, padding=1,
                    groups=self.right_part_out, activation=None),
        )
        if ca_type == 'se':
            self.ca = SEBlock(out_channels, internal_neurons=out_channels // 16)
        elif ca_type == 'ca':
            self.ca = ChannelAttension(out_channels)
        elif ca_type == 'none':
            self.ca = nn.Identity()
        else:
            raise ValueError(f'<{ca_type=}> not in se|ca|none')

    def forward(self, x):
        left = x[:, :self.left_part, :, :]
        right = x[:, self.left_part:, :, :]
        out = self.conv(right)
        out = self.ca(torch.cat((left, out), 1))
        return out

net = RepBasicUnit(256, 256)
show_macs_params(net, dummy_input=torch.rand(1, 256, 64, 64))
inference_speed(net, dummy_input=torch.rand(1, 256, 64, 64))

RepBasicUnit		macs=73960576.0	params=53952.0
FLOPs= 0.073960576G	params= 0.053952M
macs='73.961M'	params='53.952K'
Mean@ 0.752ms Std@ 0.066ms FPS@ 1330.52


In [None]:
class DWConv_ELAN(nn.Module):
    def __init__(self, in_channel, out_channel):
        super().__init__()
        mid_channel = in_channel // 2
        self.conv1 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            # RepConv(mid_channel, mid_channel, 1, 1, 0),
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            # RepConv(mid_channel, mid_channel, 1, 1, 0),
        )
        self.conv2 = nn.Sequential(
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            # RepConv(mid_channel, mid_channel, 1, 1, 0),
            RepConv(mid_channel, mid_channel, 3, 1, 1, groups=mid_channel, activation=None),
            # RepConv(mid_channel, mid_channel, 1, 1, 0),
        )
        self.conv3 = nn.Conv2d(4 * mid_channel, out_channel, 1, 1, 0)
        self.c = mid_channel

    def forward(self, x):
        out1 = self.conv1(x[:, :self.c, :, :])
        out2 = self.conv2(out1)
        out = self.conv3(torch.cat([x, out1, out2], dim=1))
        out = channel_shuffle(out, groups=2)
        return out

x = torch.rand(1, 256, 64, 64)
net = DWConv_ELAN(256, 256)
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)

DWConv_ELAN		macs=560988160.0	params=136960.0
FLOPs= 0.56098816G	params= 0.13696M
macs='560.988M'	params='136.960K'
Mean@ 0.606ms Std@ 0.162ms FPS@ 1649.98


In [None]:
net = ChannelAttension(256, deploy=True)
x = torch.rand(1, 256, 32, 32)
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)

ChannelAttension		macs=35712.0	params=35648.0
FLOPs= 3.5712e-05G	params= 0.035648M
macs='35.712K'	params='35.648K'
Mean@ 0.316ms Std@ 0.082ms FPS@ 3160.25


In [None]:
net = RepConv(256, 256, 1, 1, 0, deploy=True)
x = torch.rand(1, 256, 32, 32)
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)

RepConv		macs=67633152.0	params=65792.0
FLOPs= 0.067633152G	params= 0.065792M
macs='67.633M'	params='65.792K'
Mean@ 0.107ms Std@ 0.078ms FPS@ 9369.75


MSRB

In [None]:
class MSRB(nn.Module):
    def __init__(self, in_channels, out_channels, ca_type='none'):
        super().__init__()
        self.half_channels = in_channels // 2
        self.branch1 = nn.ModuleList([
            RepConv(self.half_channels, self.half_channels, 3, 1, 1,
                    groups=self.half_channels, activation=None),
            RepConv(self.half_channels, self.half_channels, 3, 1, 1,
                    groups=self.half_channels, activation=None)
             ])
        self.branch2 = nn.ModuleList([
            RepConv(self.half_channels, self.half_channels, 3, 1, 2, 2, 
                    groups=self.half_channels, activation=None),
            RepConv(self.half_channels, self.half_channels, 3, 1, 2, 2,
                    groups=self.half_channels, activation=None)
            ])

        # 信息交换，通道注意力模块
        if ca_type == 'se':
            self.ca = nn.ModuleList([
                SEBlock(out_channels, internal_neurons=out_channels // 16),
                SEBlock(out_channels, internal_neurons=out_channels // 16)])
        elif ca_type == 'ca':
            self.ca = nn.ModuleList([ChannelAttension(out_channels),
                                     ChannelAttension(out_channels)])
        else:
            self.ca = nn.ModuleList([nn.Identity(), nn.Identity()])
        
        self.conv = RepConv(in_channels, out_channels, 1, 1, 0)

    def forward(self, x):
        out = x
        for _b1, _b2, _ca in zip(self.branch1, self.branch2, self.ca):
            left, right = torch.chunk(out, 2, dim=1)
            left = _b1(left)
            right = _b2(right)
            out = out + _ca(torch.cat([left, right], dim=1))
        return self.conv(out + x)

net = MSRB(256, 256, 'ca')
x = torch.rand(1, 256, 64, 64)
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)

MSRB		macs=294721792.0	params=143488.0
FLOPs= 0.294721792G	params= 0.143488M
macs='294.722M'	params='143.488K'
Mean@ 1.481ms Std@ 0.108ms FPS@ 675.04


测试Hourglas_ablation

In [6]:
from models import hourglass_ablation
import addict
cfg = dict(MODEL=dict(
        name='hourglass_ablation',
        input_channel=128,
        output_channel=21,       # num_joints + 3 region map
        num_stage=4,            # 每个沙漏模块不同尺度特征层个数
        num_block=[2, 2, 2],
        msrb=True,
        rca=True,
        ca_type='ca' ,
))
cfg = addict.Dict(cfg)
net = hourglass_ablation(cfg)
x = torch.rand(1, 3, 256, 256)
show_macs_params(net, dummy_input=x)
inference_speed(net, dummy_input=x)

hourglass_ablation		macs=1136115584.0	params=2348693.0
FLOPs= 1.136115584G	params= 2.348693M
macs='1.136G'	params='2.349M'
Mean@ 20.487ms Std@ 1.998ms FPS@ 48.81
