In [1]:
import os
import torch
import torch.nn as nn

from profiler import Profiler
from nas.models.with_mobilenet import PoseEstimationWithMobileNet
from mobilenetv2 import MobileNetV2

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x19836b1c198>

In [3]:
mnv1 = PoseEstimationWithMobileNet().cuda()

In [4]:
sum([p.numel() for p in mnv1.parameters()])

4091698

In [5]:
profiler = Profiler(mnv1.model, "backbone")
a = torch.randn((4, 3, 128, 128)).cuda()
for _ in range(100):
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_backbone = profiler.total_time

backbone
0               mean: 5.90    ms, 	std: 53.38   ms, 	percent: 30.05  %
1               mean: 1.46    ms, 	std: 1.78    ms, 	percent: 7.42   %
2               mean: 1.00    ms, 	std: 0.19    ms, 	percent: 5.10   %
3               mean: 1.12    ms, 	std: 0.32    ms, 	percent: 5.72   %
4               mean: 0.89    ms, 	std: 0.28    ms, 	percent: 4.55   %
5               mean: 0.95    ms, 	std: 0.20    ms, 	percent: 4.86   %
6               mean: 1.12    ms, 	std: 0.21    ms, 	percent: 5.71   %
7               mean: 1.42    ms, 	std: 0.21    ms, 	percent: 7.24   %
8               mean: 1.45    ms, 	std: 0.32    ms, 	percent: 7.40   %
9               mean: 1.43    ms, 	std: 0.22    ms, 	percent: 7.27   %
10              mean: 1.45    ms, 	std: 0.32    ms, 	percent: 7.38   %
11              mean: 1.43    ms, 	std: 0.21    ms, 	percent: 7.29   %
Total time: 19.65 ms


In [6]:
profiler = Profiler(mnv1.cpm, "cpm")
a = torch.randn((4, 512, 16, 16)).cuda()
for _ in range(100):
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_cpm = profiler.total_time

cpm
align           mean: 0.38    ms, 	std: 0.07    ms, 	percent: 9.68   %
trunk           mean: 2.62    ms, 	std: 7.36    ms, 	percent: 66.16  %
conv            mean: 0.96    ms, 	std: 4.67    ms, 	percent: 24.16  %
Total time: 3.95 ms


In [7]:
profiler = Profiler(mnv1.initial_stage, "initial_stage")
a = torch.randn((4, 128, 16, 16)).cuda()
for _ in range(100):
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_init_stage = profiler.total_time

initial_stage
trunk           mean: 1.52    ms, 	std: 0.29    ms, 	percent: 50.03  %
heatmaps        mean: 0.75    ms, 	std: 0.15    ms, 	percent: 24.64  %
pafs            mean: 0.77    ms, 	std: 0.17    ms, 	percent: 25.33  %
Total time: 3.03 ms


In [8]:
profiler = Profiler(mnv1.refinement_stages._modules["0"], "refinement stage")
a = torch.randn((4, 185, 16, 16)).cuda()
for _ in range(100):
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_refine_stage = profiler.total_time

refinement stage
trunk           mean: 11.24   ms, 	std: 9.11    ms, 	percent: 85.46  %
heatmaps        mean: 0.95    ms, 	std: 0.11    ms, 	percent: 7.23   %
pafs            mean: 0.96    ms, 	std: 0.21    ms, 	percent: 7.31   %
Total time: 13.16 ms


In [9]:
def info(name, block_time, total_time):
    ratio = block_time / total_time
    return f"{name:<20} total: {block_time * 1000:<8.4f} ms, relative: {ratio:.4f}, percent: {ratio * 100:.4f}%"

total = total_backbone + total_cpm + total_init_stage + total_refine_stage
print(f"Total net time: {total * 1000:.4f} ms")
print(info("backbone", total_backbone, total))
print(info("cpm", total_cpm, total))
print(info("initial stage", total_init_stage, total))
print(info("refinement stage", total_refine_stage, total))

Total net time: 39.7895 ms
backbone             total: 19.6475  ms, relative: 0.4938, percent: 49.3786%
cpm                  total: 3.9528   ms, relative: 0.0993, percent: 9.9342%
initial stage        total: 3.0342   ms, relative: 0.0763, percent: 7.6257%
refinement stage     total: 13.1550  ms, relative: 0.3306, percent: 33.0615%


In [3]:
# конфигурация сети
inverted_residual_setting = [
   # t, c,   n, s, p, d
    [1, 64,  1, 1, 1, 1],
    [1, 128, 1, 2, 1, 1],
    [1, 128, 1, 1, 1, 1],
    [1, 256, 1, 2, 1, 1],
    [1, 256, 1, 1, 1, 1],
    [1, 512, 1, 1, 1, 1],
    [1, 512, 1, 1, 2, 2],
    [1, 512, 4, 1, 1, 1],
]
mnv2 = MobileNetV2(in_channels = 32, last_channels = 512, inverted_residual_setting = inverted_residual_setting).cuda()

In [11]:
b = torch.randn((1, 3, 128, 128)).cuda()
mnv1.model(b).shape == mnv2(b).shape

True

In [4]:
sum([p.numel() for p in mnv2.parameters()])

1613120

In [5]:
profiler = Profiler(mnv2.model, "mnv2 backbone")
a = torch.randn((4, 3, 128, 128)).cuda()
for _ in range(100):
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_mnv2_backbone = profiler.total_time

mnv2 backbone
0               mean: 5.92    ms, 	std: 53.73   ms, 	percent: 18.90  %
1               mean: 2.46    ms, 	std: 2.15    ms, 	percent: 7.85   %
2               mean: 1.78    ms, 	std: 0.34    ms, 	percent: 5.69   %
3               mean: 2.13    ms, 	std: 0.40    ms, 	percent: 6.81   %
4               mean: 1.56    ms, 	std: 0.27    ms, 	percent: 4.97   %
5               mean: 1.86    ms, 	std: 0.39    ms, 	percent: 5.92   %
6               mean: 2.00    ms, 	std: 0.33    ms, 	percent: 6.37   %
7               mean: 2.75    ms, 	std: 0.48    ms, 	percent: 8.79   %
8               mean: 2.71    ms, 	std: 0.34    ms, 	percent: 8.65   %
9               mean: 2.72    ms, 	std: 0.40    ms, 	percent: 8.68   %
10              mean: 2.72    ms, 	std: 0.33    ms, 	percent: 8.67   %
11              mean: 2.73    ms, 	std: 0.40    ms, 	percent: 8.70   %
Total time: 31.34 ms
