In [1]:
from argparse import Namespace
import os
import inspect
import numpy as np

import torch
from torch import nn
from torch import fx
from torch.fx import GraphModule, Graph, Node
from torch.utils.benchmark import Timer

import brt
from brt.runtime import log
from brt.runtime.grid_tensor import GridTensor
from brt.runtime.benchmark import profile
from brt.router import ScatterRouter, GatherRouter, switch_capture
from brt.router.fabric import make_fabric
from brt.router.fabric.base import reset_router_stats
from brt.trace import symbolic_trace, GraphTracer

# from brt.trace.graph import symbolic_trace
from brt.passes import (
    HorizFusePass,
    VerticalFusePass,
    RouterFixPass,
    OperatorReorderPass,
    DeadPathEliminatePass,
    ConstantPropagationPass,
)

log.set_level("BRT", "WARNING")
# log.set_level("BRT", "DEBUG")

# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [2]:
import sys
from brt.runtime import BRT_CACHE_PATH

sys.path.append(str(BRT_CACHE_PATH.parent / "benchmark/msdnet/"))
from msdnet import MSDNet
from theshold_inference import threshold_dynamic_evaluate
from dataloader import get_dataloaders as msdnet_get_dataloaders


In [3]:
IS_PROFILING = False
# IS_PROFILING = True
IS_FUSING_HEAD = False
IS_FUSING_HEAD = True

In [4]:
args = Namespace(
    arch="msdnet",
    base=4,
    batch_size=256,
    benchmark=["all_opt"],
    bnFactor=[1, 2, 4, 4],
    bottleneck=True,
    data="ImageNet",
    data_root="/home/lingji/brainstorm_project/dataset/imagenet",
    decay_rate=0.1,
    epochs=90,
    evalmode="threshold",
    evaluate_from="/home/lingji/brainstorm_project/brainstorm/benchmark/msdnet/msdnet-step=4-block=5.pth.tar",
    gpu="0,1,2,3",
    grFactor=[1, 2, 4, 4],
    growthRate=16,
    init_routers=True,
    lr=0.1,
    lr_type="multistep",
    momentum=0.9,
    nBlocks=5,
    nChannels=32,
    nScales=4,
    num_classes=1000,
    optimizer="sgd",
    parallel=True,
    print_freq=10,
    prune="max",
    reduction=0.5,
    resume=False,
    save="/home/lingji/brainstorm_project/brainstorm/benchmark/msdnet/saveresult",
    seed=0,
    splits=["val", "test"],
    start_epoch=0,
    step=4,
    stepmode="even",
    thresholds=[-1, -1, -1, -1],                                  # 1.0, 0.0, 0.0, 0.0, 0.0
    # thresholds=[0.44246858, -1, -1, -1],                          # 0.5, 0.5, 0.0, 0.0, 0.0
    # thresholds=[0.44246849, 0.26682281, -1, -1],                  # 0.5, 0.3, 0.2, 0.0, 0.0
    # thresholds=[0.44246864, 0.39881980, 0.19329087, -1],          # 0.5, 0.2, 0.2, 0.1, 0.0
    # thresholds=[0.96616900, 0.95113075, 0.80969042, 0.45410264],  # 0.1, 0.1, 0.2, 0.3, 0.3
    # thresholds=[1000, 1000, 0.90728849, 0.57961094],              # 0.0, 0.0, 0.3, 0.3, 0.4
    # thresholds=[1000, 1000, 1000, 0.83451331],                    # 0.0, 0.0, 0.0, 0.4, 0.6
    # thresholds=[1000, 1000, 1000, 1000],                          # 0.0, 0.0, 0.0, 0.0, 1.0
    use_valid=True,
    weight_decay=0.0001,
    workers=16,
)

state_dict = torch.load(
    "/home/lingji/brainstorm_project/brainstorm/benchmark/msdnet/MSDNet.pth"
)
_, val_dataloader, test_dataloader = msdnet_get_dataloaders(args)

test_inputs = []

for i, (input, target) in enumerate(test_dataloader):
    input = input.cuda()
    if i == 13:
        break

for i, (test_input, target) in enumerate(test_dataloader):
    # if i < 50:
    if i < 30:
        test_inputs.append(test_input.cuda())


!!!!!! Load train_set_index !!!!!!


In [5]:
msdnet: nn.Module = MSDNet(args, False).eval().cuda()
msdnet.load_state_dict(state_dict)

building network of steps: 
[4, 4, 4, 4, 4] 20
 ********************** Block 1  **********************
|		inScales 4 outScales 4 inChannels 32 outChannels 16		|

|		inScales 4 outScales 4 inChannels 48 outChannels 16		|

|		inScales 4 outScales 4 inChannels 64 outChannels 16		|

|		inScales 4 outScales 4 inChannels 80 outChannels 16		|

 ********************** Block 2  **********************
|		inScales 4 outScales 4 inChannels 96 outChannels 16		|

|		inScales 4 outScales 3 inChannels 112 outChannels 16		|
|		Transition layer inserted! (max), inChannels 128, outChannels 64	|

|		inScales 3 outScales 3 inChannels 64 outChannels 16		|

|		inScales 3 outScales 3 inChannels 80 outChannels 16		|

 ********************** Block 3  **********************
|		inScales 3 outScales 3 inChannels 96 outChannels 16		|

|		inScales 3 outScales 3 inChannels 112 outChannels 16		|

|		inScales 3 outScales 2 inChannels 128 outChannels 16		|
|		Transition layer inserted! (max), inChannels 144, outChannels

<All keys matched successfully>

In [6]:
def print_load_history(m: nn.Module):
    print("")
    for subn, subm in m.named_modules():
        if isinstance(subm, (ScatterRouter, GatherRouter)):
            print(f"{subm.fabric.capturing=}")
            print(f"{subm.fabric.load_history=}")
            print(f"{subm.fabric.cell_decision_history=}")
            print(f"{subm.fabric.cell_grain_history=}")

reset_router_stats(msdnet)
switch_capture(msdnet, True, "max", "dispatch,combine")
for test_input in test_inputs:
    test_input = test_input.cuda()
    print("*", end="")
    test_output = msdnet(test_input)
switch_capture(msdnet, False)

print("")
print_load_history(msdnet)

input = test_inputs[13]
y = msdnet(input)

******************************

subm.fabric.capturing=False
subm.fabric.load_history=array([1., 0.])
subm.fabric.cell_decision_history=[array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], dtype=int32), array([], dtype=int32)]
subm.fabric.cell_grain_history=[torch.Size([1, 96, 56, 56]), torch.Size([1, 192, 28, 28]), torch.Size([1, 384, 14, 14]), torch.Size([1, 384, 7, 7]), torch.Size([1, 1000])]
subm.fabric.capturing=False
subm.fabric.load_history=array([0., 0.])
subm.fabric.cell_decision_history=[array([], dtype=int32), array([], dtype=int32)]
subm.fabric.cell_grain_history=[torch.Size([0, 192, 28, 28]), torch.Size([0, 384, 14, 14]), torch.Size([0, 384, 7, 7]), torch.Size([0, 384, 7, 7]), torch.Size([0, 1000])]
subm.fabric.capturing=False
subm.fabric.load_history=array([0., 0.])
subm.fabric.cell_decision_history=[array([], dtype=int32), array([], dtype=int32)]
subm.fabric.cell_grain_history=[torch.Size([

In [7]:
if IS_PROFILING:
    profile(lambda: msdnet(input))

raw_time = []
for test_input in test_inputs:
    cost = Timer(
            f"model(x)",
            setup="import torch; torch.cuda.synchronize()",
            globals={"model": msdnet, "x": test_input},
        ).timeit(10).mean * 1e6
    print(cost)
    raw_time.append(cost)

34707.10220281035
34967.394499108195
35038.598300889134
34396.329685114324
34467.058489099145
34382.93789513409
34407.096705399454
36613.95909730345
34850.78550875187
35570.16709819436
34277.397696860135
34296.60592228174
34384.08358488232
34353.47559861839
34323.31420481205
34362.24730685353
34460.418904200196
34738.81690297276
34730.76689988375
34468.53049099445
34424.160909838974
34474.59260933101
34366.821707226336
34419.09612156451
34369.03508845717
34378.65050509572
34319.668589159846
34515.56800864637
34578.92008591443
34552.52698622644


In [8]:
verti_fusion_pass = VerticalFusePass(
    msdnet, sample_inputs={"x": input}, fusing_head=IS_FUSING_HEAD
)
verti_fusion_pass.run_on_graph()
msdnet_vf = verti_fusion_pass.finalize()

router.fabric_type='dispatch'
router.fabric.supported_capacities=None
router.fabric.capacity_padding=False
router.fabric.path_wise_padding=False
router.fabric_type='dispatch'
router.fabric.supported_capacities=None
router.fabric.capacity_padding=False
router.fabric.path_wise_padding=False
router.fabric_type='dispatch'
router.fabric.supported_capacities=None
router.fabric.capacity_padding=False
router.fabric.path_wise_padding=False
router.fabric_type='dispatch'
router.fabric.supported_capacities=None
router.fabric.capacity_padding=False
router.fabric.path_wise_padding=False
Found fixed routers: dict_values([])
Currently not support nodes with kwargs (`cat`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_1`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_2`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_3`), the info of kwargs won't be traced
Currently not support nodes with kwargs (

In [9]:
print(msdnet_vf.graph)

graph():
    %x : [#users=1] = placeholder[target=x] | fixed
    %_is_measure : [#users=0] = placeholder[target=_is_measure](default=False) | unfixed
    %annotator : [#users=6] = call_module[target=annotator](args = (%x,), kwargs = {}) | fixed
    %to_torch_tensor : [#users=1] = call_function[target=brt.runtime.grid_tensor.to_torch_tensor](args = (%annotator, False), kwargs = {}) | fixed
    %blocks_0_0_layers_0_0 : [#users=1] = call_module[target=blocks.0.0.layers.0.0](args = (%to_torch_tensor,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_1 : [#users=1] = call_module[target=blocks.0.0.layers.0.1](args = (%blocks_0_0_layers_0_0,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_2 : [#users=1] = call_module[target=blocks.0.0.layers.0.2](args = (%blocks_0_0_layers_0_1,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_3 : [#users=4] = call_module[target=blocks.0.0.layers.0.3](args = (%blocks_0_0_layers_0_2,), kwargs = {}) | fixed
    %BRT_VF__blocks_0_0_layers_1_net_0__blocks_0_0_layers_

In [10]:
if IS_PROFILING:
    profile(lambda: msdnet_vf(input))

vf_time = []
for test_input in test_inputs:
    msdnet_vf(test_input)
    cost = Timer(
            f"model(x)",
            setup="import torch; torch.cuda.synchronize()",
            globals={"model": msdnet_vf, "x": test_input},
        ) .timeit(10) .mean * 1e6
    print(cost)
    vf_time.append(cost)

13003.325113095343
20040.6655902043
20918.552996590734
20950.643508695066
20889.428607188165
13632.658310234547
13174.506393261254
13095.321995206177
12643.920700065792
13483.439502306283
13440.239778719842
12746.740784496069
12690.209900029004
12688.28350584954
12710.493500344455
12983.078393153846
13297.378597781062
13075.81348810345
12771.130702458322
12701.257993467152
13062.49089539051
12988.0805965513
12713.611009530723
13104.03801035136
12973.765493370593
13018.198101781309
13248.107978142798
13484.35280378908
13419.973198324442
12758.08159261942


In [11]:
print(msdnet_vf.graph)

graph():
    %x : [#users=1] = placeholder[target=x] | fixed
    %_is_measure : [#users=0] = placeholder[target=_is_measure](default=False) | unfixed
    %annotator : [#users=6] = call_module[target=annotator](args = (%x,), kwargs = {}) | fixed
    %to_torch_tensor : [#users=1] = call_function[target=brt.runtime.grid_tensor.to_torch_tensor](args = (%annotator, False), kwargs = {}) | fixed
    %blocks_0_0_layers_0_0 : [#users=1] = call_module[target=blocks.0.0.layers.0.0](args = (%to_torch_tensor,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_1 : [#users=1] = call_module[target=blocks.0.0.layers.0.1](args = (%blocks_0_0_layers_0_0,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_2 : [#users=1] = call_module[target=blocks.0.0.layers.0.2](args = (%blocks_0_0_layers_0_1,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_3 : [#users=4] = call_module[target=blocks.0.0.layers.0.3](args = (%blocks_0_0_layers_0_2,), kwargs = {}) | fixed
    %BRT_VF__blocks_0_0_layers_1_net_0__blocks_0_0_layers_

In [12]:
eliminate_pass = DeadPathEliminatePass(msdnet_rf)
eliminate_pass.run_on_graph()
msdnet_dpe = eliminate_pass.finalize()

constant_propagation_pass = ConstantPropagationPass(
    msdnet_dpe, upper_perm_load=1
)
constant_propagation_pass.run_on_graph()
msdnet_cp = constant_propagation_pass.finalize()

operator_reorder_pass = OperatorReorderPass(msdnet_cp, False)
operator_reorder_pass.run_on_graph()
msdnet_reorder = operator_reorder_pass.finalize()

print(msdnet_reorder.graph)


graph():
    %x : [#users=1] = placeholder[target=x]
    %_is_measure : [#users=0] = placeholder[target=_is_measure](default=False)
    %annotator : [#users=1] = call_module[target=annotator](args = (%x,), kwargs = {})
    %blocks_0_0_layers_0_0 : [#users=1] = call_module[target=blocks.0.0.layers.0.0](args = (%annotator,), kwargs = {})
    %blocks_0_0_layers_0_1 : [#users=1] = call_module[target=blocks.0.0.layers.0.1](args = (%blocks_0_0_layers_0_0,), kwargs = {})
    %blocks_0_0_layers_0_2 : [#users=1] = call_module[target=blocks.0.0.layers.0.2](args = (%blocks_0_0_layers_0_1,), kwargs = {})
    %blocks_0_0_layers_0_3 : [#users=4] = call_module[target=blocks.0.0.layers.0.3](args = (%blocks_0_0_layers_0_2,), kwargs = {})
    %blocks_0_0_layers_1_net_0 : [#users=1] = call_module[target=blocks.0.0.layers.1.net.0](args = (%blocks_0_0_layers_0_3,), kwargs = {})
    %blocks_0_1_layers_0_conv_normal_net_0 : [#users=1] = call_module[target=blocks.0.1.layers.0.conv_normal.net.0](args = (%block

In [13]:
if IS_PROFILING:
    profile(lambda: msdnet_reorder(input))

opr_time = []
for test_input in test_inputs:
    cost = Timer(
            f"model(x)",
            setup="import torch; torch.cuda.synchronize()",
            globals={"model": msdnet_reorder, "x": test_input},
        ) .timeit(10) .mean * 1e6
    print(cost)
    opr_time.append(cost)

17972.885281778872
18102.532299235463
18074.400699697435
18091.868003830314
18047.10349533707
18117.3802819103
18072.980316355824
18101.92228294909
18104.204488918185
17988.10891341418
18238.099105656147
18099.0070104599
17997.803911566734
18144.265306182206
18155.12788016349
18136.757100000978
16784.841986373067
11075.658584013581
10726.630198769271
10700.106294825673
10734.227579087019
10806.504706852138
11143.650184385478
10637.439298443496
10700.103896670043
10653.313482180238
10554.43489458412
10945.09451650083
10516.008990816772
10693.304194137454


In [14]:
sp_verti_fusion_pass = VerticalFusePass(
    msdnet_reorder, sample_inputs={"x": input}, fusing_head=IS_FUSING_HEAD
)
sp_verti_fusion_pass.run_on_graph()
msdnet_sp = sp_verti_fusion_pass.finalize()

Found fixed routers: dict_values([])
Currently not support nodes with kwargs (`cat`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_1`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_2`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_3`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_4`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_5`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_6`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_7`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_8`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_9`), the info of kwargs won't be traced
start node `x` should be a fixed module node
start node `_is_measure` should be a fixed module node
can't

In [15]:
if IS_PROFILING:
    profile(lambda: msdnet_sp(input))

sp_time = []
for test_input in test_inputs:
    cost = Timer(
            f"model(x)",
            setup="import torch; torch.cuda.synchronize()",
            globals={"model": msdnet_sp, "x": test_input},
        ) .timeit(10) .mean * 1e6
    print(cost)
    sp_time.append(cost)

1227.9876042157412
1366.3239078596234
1360.2469116449356
1356.9823931902647
1363.0061177536845
1365.3353787958622
1355.602010153234
1357.7269157394767
1363.4052127599716
1355.9928862378001
1361.5948846563697
1546.1043920367956
1363.610103726387
1358.5980981588364
1362.3852981254458
1355.3321128711104
1363.8186967000365
1358.046499080956
1359.802414663136
1775.8667934685946
1362.0713027194142
1357.191801071167
1408.2239009439945
1364.739891141653
1361.9158184155822
1351.7056126147509
1364.5501108840108
1353.4005032852292
1358.1337872892618
1359.7961980849504


In [16]:
print(msdnet_sp.graph)

graph():
    %x : [#users=1] = placeholder[target=x] | fixed
    %_is_measure : [#users=0] = placeholder[target=_is_measure](default=False) | unfixed
    %annotator : [#users=1] = call_module[target=annotator](args = (%x,), kwargs = {}) | fixed
    %to_torch_tensor : [#users=1] = call_function[target=brt.runtime.grid_tensor.to_torch_tensor](args = (%annotator, False), kwargs = {}) | fixed
    %blocks_0_0_layers_0_0 : [#users=1] = call_module[target=blocks.0.0.layers.0.0](args = (%to_torch_tensor,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_1 : [#users=1] = call_module[target=blocks.0.0.layers.0.1](args = (%blocks_0_0_layers_0_0,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_2 : [#users=1] = call_module[target=blocks.0.0.layers.0.2](args = (%blocks_0_0_layers_0_1,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_3 : [#users=4] = call_module[target=blocks.0.0.layers.0.3](args = (%blocks_0_0_layers_0_2,), kwargs = {}) | fixed
    %BRT_VF__blocks_0_0_layers_1_net_0__blocks_0_0_layers_

In [17]:
horiz_fusion_pass = HorizFusePass(
    msdnet_reorder, sample_inputs={"x": input}, fusing_head=IS_FUSING_HEAD,
)
horiz_fusion_pass.run_on_graph()
msdnet_hf = horiz_fusion_pass.finalize()

Found fixed routers: dict_values([])
Currently not support nodes with kwargs (`cat`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_1`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_2`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_3`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_4`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_5`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_6`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_7`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_8`), the info of kwargs won't be traced
Currently not support nodes with kwargs (`cat_9`), the info of kwargs won't be traced
can't find jit module
At branch 0, node `annotator` is not vfusable
No nodes are h-fusable, continue
fuse

In [18]:
if IS_PROFILING:
    profile(lambda: msdnet_hf(input))

hf_time = []
for test_input in test_inputs:
    cost = Timer(
            f"model(x)",
            setup="import torch; torch.cuda.synchronize()",
            globals={"model": msdnet_hf, "x": test_input},
        ) .timeit(100) .mean * 1e6
    print(cost)
    hf_time.append(cost)

832.4467809870839
905.2827511914074
948.4484908170998
897.4875696003437
868.2441199198365
856.3094213604927
917.7400497719646
914.5494597032666
890.0095312856138
869.8760904371738
880.736829712987
899.597171228379
896.8167495913804
858.8446606881917
860.9298220835626
908.6570702493191
848.5695905983448
879.8141498118639
868.9335593953729
894.6310402825475
858.5758809931576
874.3663295172155
855.4232213646173
876.8099080771208
873.370619956404
853.3562789671123
852.8430620208383
870.1311913318932
858.8945819064975
887.8164296038449


In [19]:
print(msdnet_hf.graph)

graph():
    %x : [#users=1] = placeholder[target=x] | fixed
    %_is_measure : [#users=0] = placeholder[target=_is_measure](default=False) | unfixed
    %annotator : [#users=1] = call_module[target=annotator](args = (%x,), kwargs = {}) | fixed
    %to_torch_tensor : [#users=1] = call_function[target=brt.runtime.grid_tensor.to_torch_tensor](args = (%annotator, False), kwargs = {}) | fixed
    %blocks_0_0_layers_0_0 : [#users=1] = call_module[target=blocks.0.0.layers.0.0](args = (%to_torch_tensor,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_1 : [#users=1] = call_module[target=blocks.0.0.layers.0.1](args = (%blocks_0_0_layers_0_0,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_2 : [#users=1] = call_module[target=blocks.0.0.layers.0.2](args = (%blocks_0_0_layers_0_1,), kwargs = {}) | fixed
    %blocks_0_0_layers_0_3 : [#users=2] = call_module[target=blocks.0.0.layers.0.3](args = (%blocks_0_0_layers_0_2,), kwargs = {}) | fixed
    %BRT_HF__V_blocks_0_1_layers_0_conv_normal_net_0__bloc

In [20]:
t_raw_time = torch.tensor(raw_time)
t_vf_time = torch.tensor(vf_time)
t_sp_time = torch.tensor(sp_time)
t_hf_time = torch.tensor(hf_time)

vf_speed_up = t_raw_time / t_vf_time
sp_speed_up = t_raw_time / t_sp_time
hf_speed_up = t_raw_time / t_hf_time

print("mean")
print(f"vf: {t_vf_time.mean()}")
print(f"sp: {t_sp_time.mean()}")
print(f"hf: {t_hf_time.mean()}")
print("max")
print(f"vf: {t_vf_time.max()}")
print(f"sp: {t_sp_time.max()}")
print(f"hf: {t_hf_time.max()}")
print("min")
print(f"vf: {t_vf_time.min()}")
print(f"sp: {t_sp_time.min()}")
print(f"hf: {t_hf_time.min()}")

mean
vf: 2.5245282649993896
sp: 25.209041595458984
hf: 39.41164779663086
max
vf: 2.795957326889038
sp: 28.263399124145508
hf: 41.69287872314453
min
vf: 1.6417791843414307
sp: 19.409412384033203
hf: 36.94306945800781


In [21]:
# 1 -1 -1 -1
# mean
# vf: 1.2304408550262451
# sp: 11.934501647949219
# hf: 13.882946014404297
# max
# vf: 1.3284677267074585
# sp: 13.092482566833496
# hf: 15.02823257446289
# min
# vf: 1.1286181211471558
# sp: 11.222148895263672
# hf: 13.1246976852417

# -1 -1 -1 -1
# mean
# vf: 1.363097071647644
# sp: 5.163495063781738
# hf: 8.713125228881836
# max
# vf: 1.4690160751342773
# sp: 6.095772743225098
# hf: 9.53192138671875
# min
# vf: 1.2727227210998535
# sp: 4.845872402191162
# hf: 8.095858573913574