In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
import torch
import torch.nn as nn

In [3]:
from profiler import Profiler
from nas.models.with_mobilenet import PoseEstimationWithMobileNet

In [5]:
net = PoseEstimationWithMobileNet().cuda()

In [7]:
profiler = Profiler(net.model, "backbone")
for _ in range(100):
    a = torch.randn((8, 3, 256, 256)).cuda()
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_backbone = profiler.total_time

backbone
0               mean: 8.14    ms, 	std: 60.27   ms, 	percent: 13.33  %
1               mean: 6.45    ms, 	std: 1.21    ms, 	percent: 10.57  %
2               mean: 3.92    ms, 	std: 0.85    ms, 	percent: 6.42   %
3               mean: 5.15    ms, 	std: 0.98    ms, 	percent: 8.43   %
4               mean: 2.24    ms, 	std: 0.40    ms, 	percent: 3.66   %
5               mean: 2.82    ms, 	std: 0.65    ms, 	percent: 4.62   %
6               mean: 3.74    ms, 	std: 0.80    ms, 	percent: 6.13   %
7               mean: 5.71    ms, 	std: 1.13    ms, 	percent: 9.36   %
8               mean: 5.71    ms, 	std: 0.98    ms, 	percent: 9.35   %
9               mean: 5.71    ms, 	std: 0.96    ms, 	percent: 9.35   %
10              mean: 5.71    ms, 	std: 0.97    ms, 	percent: 9.35   %
11              mean: 5.76    ms, 	std: 0.96    ms, 	percent: 9.44   %
Total time: 61.05 ms


In [8]:
profiler = Profiler(net.cpm, "cpm")
for _ in range(100):
    a = torch.randn((8, 512, 32, 32)).cuda()
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_cpm = profiler.total_time

cpm
align           mean: 1.22    ms, 	std: 0.12    ms, 	percent: 5.24   %
trunk           mean: 3.23    ms, 	std: 0.34    ms, 	percent: 13.87  %
conv            mean: 18.83   ms, 	std: 5.65    ms, 	percent: 80.89  %
Total time: 23.28 ms


In [9]:
profiler = Profiler(net.initial_stage, "initial_stage")
for _ in range(100):
    a = torch.randn((8, 128, 32, 32)).cuda()
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_init_stage = profiler.total_time

initial_stage
trunk           mean: 54.87   ms, 	std: 0.82    ms, 	percent: 91.82  %
heatmaps        mean: 2.36    ms, 	std: 0.19    ms, 	percent: 3.95   %
pafs            mean: 2.53    ms, 	std: 0.22    ms, 	percent: 4.23   %
Total time: 59.76 ms


In [10]:
profiler = Profiler(net.refinement_stages._modules["0"], "refinement stage")
for _ in range(100):
    a = torch.randn((8, 185, 32, 32)).cuda()
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_refine_stage = profiler.total_time

refinement stage
trunk           mean: 56.58   ms, 	std: 40.42   ms, 	percent: 96.38  %
heatmaps        mean: 1.07    ms, 	std: 0.13    ms, 	percent: 1.82   %
pafs            mean: 1.06    ms, 	std: 0.18    ms, 	percent: 1.80   %
Total time: 58.71 ms


In [11]:
def info(name, block_time, total_time):
    ratio = block_time / total_time
    return f"{name:<20} total: {block_time * 1000:<8.4f} ms, relative: {ratio:.4f}, percent: {ratio * 100:.4f}%"

total = total_backbone + total_cpm + total_init_stage + total_refine_stage
print(f"Total net time: {total * 1000:.4f} ms")
print(info("backbone", total_backbone, total))
print(info("cpm", total_cpm, total))
print(info("initial stage", total_init_stage, total))
print(info("refinement stage", total_refine_stage, total))

Total net time: 202.7953 ms
backbone             total: 61.0480  ms, relative: 0.3010, percent: 30.1033%
cpm                  total: 23.2843  ms, relative: 0.1148, percent: 11.4817%
initial stage        total: 59.7566  ms, relative: 0.2947, percent: 29.4665%
refinement stage     total: 58.7064  ms, relative: 0.2895, percent: 28.9486%


In [12]:
from mobilenetv2 import MobileNetV2

In [13]:
# конфигурация сети
inverted_residual_setting = [
   # t, c,   n, s
    [1, 64,  1, 1],
    [1, 128, 1, 2],
    [1, 128, 1, 1],
    [1, 256, 1, 2],
    [1, 256, 1, 1],
    [1, 512, 6, 1],
]
mnv2 = MobileNetV2(in_channels = 32, last_channels = 512, inverted_residual_setting = inverted_residual_setting).cuda()

In [14]:
mnv1 = PoseEstimationWithMobileNet().model

In [15]:
sum(p.numel() for p in mnv2.parameters()) == sum(p.numel() for p in mnv1.parameters())

True

In [16]:
profiler = Profiler(mnv2.model, "mnv2 backbone")
for _ in range(100):
    a = torch.randn((8, 3, 256, 256)).cuda()
    profiler.forward(a)
profiler.collect_data()
profiler.show_table()
total_mnv2_backbone = profiler.total_time

mnv2 backbone
0               mean: 1.99    ms, 	std: 0.17    ms, 	percent: 2.02   %
1               mean: 10.71   ms, 	std: 0.57    ms, 	percent: 10.84  %
2               mean: 6.50    ms, 	std: 0.35    ms, 	percent: 6.58   %
3               mean: 9.55    ms, 	std: 0.50    ms, 	percent: 9.68   %
4               mean: 3.83    ms, 	std: 0.44    ms, 	percent: 3.88   %
5               mean: 5.17    ms, 	std: 0.57    ms, 	percent: 5.24   %
6               mean: 6.44    ms, 	std: 0.76    ms, 	percent: 6.53   %
7               mean: 10.83   ms, 	std: 0.52    ms, 	percent: 10.97  %
8               mean: 10.91   ms, 	std: 0.51    ms, 	percent: 11.05  %
9               mean: 10.87   ms, 	std: 0.64    ms, 	percent: 11.01  %
10              mean: 10.99   ms, 	std: 1.07    ms, 	percent: 11.13  %
11              mean: 10.94   ms, 	std: 1.08    ms, 	percent: 11.08  %
Total time: 98.73 ms
