# Exporting PyTorch Models

Some short intro code.

- [ExecuTorch Runtime Overview](https://pytorch.org/executorch/stable/runtime-overview.html)
- [Deploying Torch-TensorRT Programs](https://pytorch.org/TensorRT/tutorials/runtime.html)
- [`torch.onnx`](https://pytorch.org/docs/stable/onnx.html)
- [TorchDynamo-based ONNX Exporter](https://pytorch.org/docs/stable/onnx_dynamo.html#torchdynamo-based-onnx-exporter)
  - this one might be more difficult to make work
- [TorchScript-based ONNX Exporter](https://pytorch.org/docs/stable/onnx_torchscript.html)
  - this is what i used to export the model here

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import torch
import deeplabcut as dlc

from deeplabcut.pose_estimation_pytorch.config import read_config_as_dict, pretty_print
from deeplabcut.pose_estimation_pytorch.models import PoseModel

Loading DLC 3.0.0rc4...
DLC loaded in light mode; you cannot use any GUI (labeling, relabeling and standalone GUI)


  from .autonotebook import tqdm as notebook_tqdm


In [17]:
# root = Path("/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train")
# model_cfg = read_config_as_dict(root / "pytorch_config.yaml")
# weights_path = root / "snapshot-200.pt"
# #dest_dict = Path("/media1/data/anna/DLC_AI2024/DeepLabCut-live/ONNX_files")

root = Path("/media1/data/dikra/fly-kevin")
model_cfg = read_config_as_dict(root / "pytorch_config.yaml")
weights_path = root / "snapshot-100.pt"
#dest_dict = Path("/media1/data/anna/DLC_AI2024/DeepLabCut-live/ONNX_files")


pretty_print(model_cfg["model"])

backbone:
  type: ResNet
  model_name: resnet50_gn
  output_stride: 16
  freeze_bn_stats: True
  freeze_bn_weights: False
backbone_output_channels: 2048
heads:
  bodypart:
    type: HeatmapHead
    weight_init: normal
    predictor:
      type: HeatmapPredictor
      apply_sigmoid: False
      clip_scores: True
      location_refinement: True
      locref_std: 7.2801
    target_generator:
      type: HeatmapGaussianGenerator
      num_heatmaps: 25
      pos_dist_thresh: 17
      heatmap_mode: KEYPOINT
      generate_locref: True
      locref_std: 7.2801
    criterion:
      heatmap:
        type: WeightedMSECriterion
        weight: 1.0
      locref:
        type: WeightedHuberCriterion
        weight: 0.05
    heatmap_config:
      channels: [2048, 25]
      kernel_size: [3]
      strides: [2]
    locref_config:
      channels: [2048, 50]
      kernel_size: [3]
      strides: [2]


In [18]:
model = PoseModel.build(model_cfg["model"])
weights = torch.load(weights_path, map_location="cuda")
model.load_state_dict(weights['model'])


  weights = torch.load(weights_path, map_location="cuda")


<All keys matched successfully>

In [4]:
#!pip install --upgrade onnx onnxscript


In [19]:
model.eval()
outputs = model(torch.ones((1, 3, 128, 128)))
predictions = model.get_predictions(outputs)

print(predictions)

{'bodypart': {'poses': tensor([[[[2.9188e+01, 1.0731e+02, 1.1435e-02],
          [2.8747e+01, 9.9442e+01, 2.0099e-02],
          [1.1714e+02, 1.0701e+02, 6.9109e-03],
          [3.6253e+01, 1.0663e+02, 1.1917e-02],
          [2.9835e+01, 1.0776e+02, 1.3606e-02],
          [2.8415e+01, 1.0824e+02, 1.2388e-02],
          [9.3935e+01, 1.2450e+02, 4.0271e-03],
          [1.0160e+02, 9.9875e+01, 1.6600e-02],
          [3.6507e+01, 1.0833e+02, 1.4177e-02],
          [3.7340e+01, 1.0862e+02, 9.2899e-03],
          [2.8216e+01, 1.0749e+02, 1.4782e-02],
          [2.9113e+01, 9.9871e+01, 2.7031e-02],
          [1.0144e+02, 2.7210e+01, 1.2358e-02],
          [2.9196e+01, 1.0035e+02, 8.1381e-03],
          [4.2969e+00, 1.0004e+02, 6.5386e-03],
          [1.9925e+01, 1.1744e+01, 3.8608e-03],
          [1.0198e+02, 1.0771e+02, 7.8444e-03],
          [2.8273e+01, 2.8349e+01, 1.9638e-02],
          [2.7892e+01, 1.1740e+01, 9.5909e-03],
          [2.9514e+01, 1.0792e+02, 6.3733e-02],
          [1.1755

In [20]:
dummy_input = torch.zeros((8, 3, 640, 480))

torch.onnx.export(
    model,
    dummy_input,
    root / "resnet.onnx",
    verbose=False,
)

In [21]:
import onnx

# Load the ONNX model
onnx_model = onnx.load(root / "resnet.onnx")

# Check that the model is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(onnx_model.graph))

graph main_graph (
  %input.1[FLOAT, 8x3x640x480]
) initializers (
  %backbone.model.conv1.weight[FLOAT, 64x3x7x7]
  %backbone.model.layer1.0.conv1.weight[FLOAT, 64x64x1x1]
  %backbone.model.layer1.0.conv2.weight[FLOAT, 64x64x3x3]
  %backbone.model.layer1.0.conv3.weight[FLOAT, 256x64x1x1]
  %backbone.model.layer1.0.downsample.0.weight[FLOAT, 256x64x1x1]
  %backbone.model.layer1.1.conv1.weight[FLOAT, 64x256x1x1]
  %backbone.model.layer1.1.conv2.weight[FLOAT, 64x64x3x3]
  %backbone.model.layer1.1.conv3.weight[FLOAT, 256x64x1x1]
  %backbone.model.layer1.2.conv1.weight[FLOAT, 64x256x1x1]
  %backbone.model.layer1.2.conv2.weight[FLOAT, 64x64x3x3]
  %backbone.model.layer1.2.conv3.weight[FLOAT, 256x64x1x1]
  %backbone.model.layer2.0.conv1.weight[FLOAT, 128x256x1x1]
  %backbone.model.layer2.0.conv2.weight[FLOAT, 128x128x3x3]
  %backbone.model.layer2.0.conv3.weight[FLOAT, 512x128x1x1]
  %backbone.model.layer2.0.downsample.0.weight[FLOAT, 512x256x1x1]
  %backbone.model.layer2.1.conv1.weight[FLOAT

In [None]:
#pip install onnxruntime

In [28]:
import onnxruntime as ort
import numpy as np
from dlclive.pose import argmax_pose_predict, extract_cnn_output, multi_pose_predict


ort_session = ort.InferenceSession(root / "resnet.onnx")

outputs = ort_session.run(
    None,
    {"input.1": np.random.randn(8, 3, 640, 480).astype(np.float32)},
)

RuntimeError: Input must be a list of dictionaries or a single numpy array for input 'input.1'.

In [25]:
img.shape

(540, 682, 3)

In [11]:
from dlclive import DLCLive, Processor
import dlclive
from dlclive.display import Display
import cv2
import numpy as np 
from dlclive.pose import argmax_pose_predict
from dlclive.predictor import HeatmapPredictor

In [22]:
dlc_proc = Processor()
#Dikra
dlc_live = DLCLive(pytorch_cfg="/media1/data/dikra/fly-kevin", processor=dlc_proc, snapshot='/media1/data/dikra/fly-kevin/snapshot-100.pt')
img = cv2.imread("/media1/data/dikra/fly-kevin/img001.png")

In [53]:
dlc_proc = Processor()
#Dikra
dlc_live = DLCLive(pytorch_cfg="/media1/data/dikra/fly-kevin", processor=dlc_proc, snapshot='/media1/data/dikra/fly-kevin/snapshot-100.pt')
img = cv2.imread("/media1/data/dikra/fly-kevin/img001.png")
scmap, locref = extract_cnn_output(outputs, dlc_live.cfg) 
print(locref.shape,scmap.shape)
print(dlc_live.cfg)
pose = argmax_pose_predict(scmap, locref, dlc_live.cfg["model"]["backbone"]["output_stride"]) # !! Which stride to use !!
# pose = multi_pose_predict(scmap, locref, stride=dlc_live.cfg["model"]["backbone"]["output_stride"])

(50, 81, 61) (25, 81, 61)
(81, 61, 25, 2) (25, 81, 61)
{'Task': None, 'scorer': None, 'date': None, 'multianimalproject': None, 'identity': None, 'project_path': '/media/dikra/ADATA HD650/PhD/DATA/DLC24_Data/Fly-Kevin-2019-03-16-1001/fly-kevin-2019-03-16/Fly-Kevin-2019-03-16/dlc-models-pytorch/iteration-1/FlyFeb19-trainset80shuffle1001/train', 'engine': 'tensorflow', 'video_sets': None, 'bodyparts': None, 'start': None, 'stop': None, 'numframes2pick': None, 'skeleton': [], 'skeleton_color': 'black', 'pcutoff': None, 'dotsize': None, 'alphavalue': None, 'colormap': None, 'TrainingFraction': None, 'iteration': None, 'default_net_type': None, 'default_augmenter': None, 'snapshotindex': None, 'detector_snapshotindex': None, 'batch_size': None, 'cropping': None, 'x1': None, 'x2': None, 'y1': None, 'y2': None, 'corner2move2': None, 'move2corner': None, 'SuperAnimalConversionTables': None, 'data': {'colormode': 'RGB', 'inference': {'normalize_images': True}, 'train': {'affine': {'p': 0.5, 'ro

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 4 dimension(s) and the array at index 1 has 1 dimension(s)

In [12]:
outputs_dict = dict()
outputs_dict['heatmap'] = torch.Tensor(outputs[0])
outputs_dict['locref'] = torch.Tensor(outputs[1])

In [13]:
outputs_dict['locref'].max(), outputs_dict['locref'].min()

(tensor(1.9083), tensor(-1.8802))

In [16]:
pred = HeatmapPredictor()
pred(stride=float(dlc_live.cfg["model"]["backbone"]["output_stride"]), outputs=outputs_dict)

16.0
torch.Size([8, 25, 81, 61])
torch.Size([8, 81, 61, 25])
torch.Size([8, 50, 81, 61])
torch.Size([8, 81, 61, 25, 2])
tensor([[58, 17, 12, 17, 17, 44, 18, 44, 45, 45, 57, 47, 15, 14, 14, 46, 12, 57,
         14, 13, 56, 16, 18, 46, 44],
        [68, 60, 59, 29, 42, 57, 40, 62, 50, 19, 61, 56,  3, 32, 20, 60, 42, 67,
         60, 19, 18, 61, 70, 68, 57],
        [60, 58, 60, 61, 60, 73, 61, 63, 60, 60, 59, 71, 59, 60, 60, 62, 61,  5,
         57, 57, 61, 60, 60, 57, 64],
        [22, 24, 25, 23, 24, 27, 22, 25, 22,  1, 28, 44, 25, 23, 26, 24, 63, 25,
         23, 23, 25, 25, 22, 23, 22],
        [58, 68, 66, 68, 55, 29, 48, 61, 66, 65, 49, 47, 29, 66, 68, 66, 29, 57,
         73, 65, 56, 49, 55, 66, 68],
        [20, 20, 18, 37, 60,  3, 58, 38, 36, 37, 39, 62, 63, 39, 39, 59, 59, 61,
         62, 59, 37, 59, 58, 19,  3],
        [61,  4, 61,  5,  4, 68, 23, 10,  2,  7,  7, 11,  3, 59, 11, 69,  1, 11,
         38, 21,  3, 61,  0,  2,  3],
        [36,  1, 54, 55, 59, 49, 56, 34, 57, 55

{'poses': tensor([[[[7.1616e+02, 9.3678e+02, 5.0798e-01],
           [4.1300e+02, 2.7912e+02, 5.1097e-01],
           [3.9530e+02, 1.9977e+02, 5.1294e-01],
           [4.1129e+02, 2.7842e+02, 5.1868e-01],
           [4.2444e+02, 2.8225e+02, 5.1547e-01],
           [4.4138e+01, 7.1348e+02, 5.0646e-01],
           [4.4352e+02, 2.9568e+02, 5.1010e-01],
           [7.3009e+01, 7.1154e+02, 5.1061e-01],
           [9.1852e+01, 7.2936e+02, 5.1520e-01],
           [9.0168e+01, 7.2719e+02, 5.1306e-01],
           [7.1452e+02, 9.1865e+02, 5.0698e-01],
           [1.2120e+02, 7.5922e+02, 5.1051e-01],
           [3.8233e+02, 2.4553e+02, 5.0642e-01],
           [4.2928e+02, 2.3481e+02, 5.1082e-01],
           [4.0877e+02, 2.3412e+02, 5.0658e-01],
           [1.0499e+02, 7.4385e+02, 5.5394e-01],
           [3.8251e+02, 2.0425e+02, 5.4185e-01],
           [7.2263e+02, 9.1759e+02, 5.1090e-01],
           [3.8084e+02, 2.3207e+02, 5.2980e-01],
           [4.0534e+02, 2.1603e+02, 5.4777e-01],
           

In [61]:
locref

array([[[[ 6.81833145e+15, -8.50080114e+16],
         [-1.55025556e+17,  2.10278335e+17],
         [ 1.12748673e+17,  1.17520682e+17],
         ...,
         [ 1.14780445e+16,  3.57216854e+16],
         [ 2.70412601e+16,  5.35883027e+16],
         [ 7.31692776e+16,  1.18435476e+17]],

        [[ 4.33750508e+16,  8.52288843e+16],
         [ 4.41673563e+16,  6.90622967e+16],
         [ 5.86081402e+16,  1.13206565e+16],
         ...,
         [ 1.38018439e+17,  1.41507387e+17],
         [ 1.42513200e+17,  1.65256787e+17],
         [ 1.89650534e+17,  1.62525909e+17]],

        [[ 1.20594367e+17,  1.04709842e+17],
         [ 9.72268927e+16,  6.39527073e+16],
         [ 4.90613728e+16,  4.88650584e+16],
         ...,
         [ 2.99526999e+17,  3.11096576e+17],
         [ 3.33721742e+17,  3.17000197e+17],
         [ 4.27869487e+17,  2.22488239e+17]],

        ...,

        [[ 1.90879290e+17,  2.34439278e+17],
         [ 1.26436330e+17,  1.51480885e+17],
         [ 1.54094596e+17,  1.26758177

In [155]:
dlc_live.cfg["model"]["heads"]["bodypart"]["predictor"]["locref_std"]

7.2801