# DLC Live PyTorch Demo

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from dlclive import DLCLive
import cv2
import numpy as np
from pathlib import Path
import time
from onnxruntime import quantization
import onnx

In [None]:
projects = ["fly-kevin", "hand-track", "superbird", "ventral-gait"]

### PyTorch to ONNX

In [None]:
# In case you do not have a .onnx model exported, use this cell to export your DLC3.0 snapshot

from deeplabcut.pose_estimation_pytorch.config import read_config_as_dict
from deeplabcut.pose_estimation_pytorch.models import PoseModel
import torch
import onnxruntime as ort

device = "cuda" if torch.cuda.is_available() else "cpu"


# Dikra
root = Path("/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3])
model_cfg = read_config_as_dict(root / "pytorch_config.yaml")
weights_path = root / "snapshot-263.pt"

# Anna
# root = Path("/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train")
# model_cfg = read_config_as_dict(root / "pytorch_config.yaml")
# weights_path = root / "snapshot-263.pt"

model = PoseModel.build(model_cfg["model"])
weights = torch.load(weights_path, map_location=device)
model.load_state_dict(weights["model"])

dummy_input = torch.zeros((1, 3, 224, 224))

torch.onnx.export(
    model,
    dummy_input,
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3] + "/resnet.onnx",
    verbose=False,
    input_names=["input"],
    dynamic_axes={"input": {0: "batch_size", 2: "height", 3: "width"}},
)

### Quant ONNX

In [None]:
# FP32 to FP16
from onnxconverter_common import float16

onnx_fp32_model_path = (
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3] + "/resnet.onnx"
)
onnx_fp16_model_path = (
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/"
    + projects[3]
    + "/resnet_fp16.onnx"
)

model_fp32 = onnx.load(onnx_fp32_model_path)
model_fp16 = float16.convert_float_to_float16(model_fp32)
onnx.save(model_fp16, onnx_fp16_model_path)

In [None]:
onnx_fp32_model_path = (
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3] + "/resnet.onnx"
)
model_prep_path = (
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/"
    + projects[3]
    + "/resnet_quant_prep.onnx"
)

# prep for quantisation
quantization.shape_inference.quant_pre_process(
    onnx_fp32_model_path, model_prep_path, skip_symbolic_shape=False
)

In [None]:
# Load test frame
img = cv2.imread(
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3] + "/img0006.png"
)

### DLC Live with ONNX exported DLC 3.0 model

In [None]:
# Dikra
onnx_dlc_live = DLCLive(
    path="/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3],
    model_type="onnx",
    device="cuda",
    display=True,
    precision="FP16",
)

# Anna
# onnx_dlc_live = DLCLive(pytorch_cfg="/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train", processor=dlc_proc, snapshot='/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train/snapshot-263.pt')
# onnx_dlc_live = DLCLive("/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/exported DLC model for dlc-live/DLC_dev-single-animal_resnet_50_iteration-1_shuffle-1", processor=dlc_proc)
# img = cv2.imread("/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/exported DLC model for dlc-live/img049.png")

onnx_pose = onnx_dlc_live.init_inference(frame=img)
onnx_pose

![Screenshot from 2024-08-20 14-29-53.png](./docs/assets/Screenshot%20from%202024-08-20%2014-36-00.png)

In [None]:
detected = onnx_pose[0]["poses"][0][0][:, 2] > 0.9
print(torch.any(detected))
x = onnx_pose[0]["poses"][0][0][detected, 0]
y = onnx_pose[0]["poses"][0][0][detected, 1]
onnx_pose[0]["poses"][:, :, :, 1][:, :2]

In [None]:
onnx_pose = onnx_dlc_live.get_pose(frame=img)
onnx_pose

In [None]:
onnx_dlc_live.display.destroy()

### DLC Live with snaptshot of DLC 3.0 model (.pt)

In [None]:
# Dikra
pytorch_dlc_live = DLCLive(
    path="/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait",
    snapshot="snapshot-263.pt",
    device="cuda",
    model_type="pytorch",
    display=True,
)

# Anna
# pytorch_dlc_live = DLCLive(pytorch_cfg="/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train", processor=dlc_proc, snapshot='/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train/snapshot-263.pt')
# pytorch_dlc_live = DLCLive("/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/exported DLC model for dlc-live/DLC_dev-single-animal_resnet_50_iteration-1_shuffle-1", processor=dlc_proc)
# img = cv2.imread("/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/exported DLC model for dlc-live/img049.png")

pytorch_pose = pytorch_dlc_live.init_inference(frame=img)
pytorch_pose

In [None]:
pytorch_dlc_live.display.destroy()

![PyTorch model inference](./docs/assets/Screenshot%20from%202024-08-20%2014-29-53.png)

### Which is faster?

In [None]:
import glob
import os

root = "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait"
test_images = glob.glob(os.path.normpath(root + "/*.png"))


def mean_time_inference(dlc_live, images):
    times = []
    for i, img_p in enumerate(images):
        img = cv2.imread(img_p)

        if i == 0:
            start = time.time()
            dlc_live.init_inference(img)
            end = time.time()
        else:
            start = time.time()
            dlc_live.get_pose(img)
            end = time.time()
        times.append(end - start)
    print(times)

    return np.mean(times)

In [None]:
dlc_live = DLCLive(
    path="/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait",
    device="cuda",
    model_type="onnx",
    display=True,
)

mean_time = mean_time_inference(dlc_live, test_images)
print(
    f"TOTAL Inference of ONNX model took on average {mean_time} seconds for {len(test_images)} images"
)

In [None]:
dlc_live = DLCLive(
    path="/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy",
    snapshot="snapshot-200.pt",
    device="cuda",
    model_type="pytorch",
)

mean_time = mean_time_inference(dlc_live, test_images)
print(
    f"Inference of PyTorch model took on average {mean_time} seconds for {len(test_images)} images"
)

In [None]:
dlc_live = DLCLive(
    path="/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/" + projects[3],
    device="tensorrt",
    model_type="onnx",
)

mean_time = mean_time_inference(dlc_live, test_images)
print(
    f"Inference of PyTorch model took on average {mean_time} seconds for {len(test_images)} images"
)

In [None]:
start = time.time()
dlc_live.get_pose(img)
end = time.time()

# Benchmarking

Currently the benchmark_pytorch.py script serves to provide a function for analyzing a preexisting video to test PyTorch for running video inference in DLC-Live. Code for running video inference on a live video feed is WIP.

For true benchmarking purposes, we aim to add feature for recording the time it takes to analyze each frame / how many frames can be analyzed per second. Discuss what measure to use and consult the DLC Live paper

In [None]:
%load_ext autoreload
%autoreload 2

# Import the analyze_video function from the file where it's defined
from dlclive.benchmark_pytorch import analyze_video

In [None]:
# New version with DLCLive object included in the code


# Define the paths
video_path = "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait/1_20cms_0degUP_first_03s.avi"
model_path = "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait"

# import cProfile
# import io
# import pstats

# pr = cProfile.Profile()
# pr.enable()

# Call the analyze_video function with the appropriate arguments
poses = analyze_video(
    video_path=video_path,
    model_path=model_path,
    model_type="onnx",
    device="cuda",
    display=True,
    save_poses=True,
    resize=0.5,
    precision = "FP16",
    # cropping= [50, 250, 100, 450], # manually set the cropping to specific pixels
    dynamic=(
        True,
        0.5,
        10,
    ),  # True = we want to apply dynamic cropping, 0.5 = the threshold for accepting a KP as detected, 10 = the margin to expand the calculatted cropping window by so it is not too narrow
    save_dir="output_directory",
    get_sys_info=True,
    draw_keypoint_names=True,
)

# #'poses' will contain the list of poses detected

# # Create a stream to capture the profiler's output
# s = io.StringIO()
# sortby = 'cumulative'
# ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
# ps.print_stats()

# # Print the profiling output
# print(s.getvalue())

In [None]:
# hand model and video

# Define the paths
video_path = "/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Hand-AnnaStuckert-2024-08-21/videos/Hand.avi"
model_path = "/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Hand-AnnaStuckert-2024-08-21/dlc-models-pytorch/iteration-0/HandAug21-trainset95shuffle101/train"


# Call the analyze_video function with the appropriate arguments
poses = analyze_video(
    video_path=video_path,
    model_path=model_path,
    model_type="onnx",
    device="cuda",
    display=True,
    save_poses=True,
    resize=0.4,
    # cropping= [50, 250, 100, 450], # manually set the cropping to specific pixels
    dynamic=(
        True,
        0.5,
        10,
    ),  # True = we want to apply dynamic cropping, 0.5 = the threshold for accepting a KP as detected, 10 = the margin to expand the calculatted cropping window by so it is not too narrow
    save_dir="output_directory",
    get_sys_info=True,
    draw_keypoint_names=True,
)

In [None]:
from dlclive import DLCLive
import cv2
import numpy as np
from pathlib import Path
import time
from onnxruntime import quantization
import onnx
from dlclive.benchmark_pytorch import analyze_video

In [None]:
# test download of benchmarking dataset
# OBS link it not working, waiting for updated link to benchmarking dataset

dlc_live = DLCLive(
    path="/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train",
    device="cpu",
    # snapshot="snapshot-263.pt",
    model_type="onnx",
    display=True,
    precision="FP16",
)
# short video
video_path = '/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/1_20cms_0degUP_first.avi'
#video_path = "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait/1_20cms_0degUP_first.avi"

poses, times = analyze_video(
    video_path=video_path,
    model_type="pytorch",
    snapshot = "snapshot-263.pt",
    device="cpu",
    #precision="FP16",
    model_path="/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train",
    display=True,
    save_poses=False,
    save_dir="/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Ventral_gait_model/train/out",
    draw_keypoint_names=False,
)

In [None]:
dlc_live.display.destroy()

In [None]:
times = [p["pose"][1] for p in poses]

In [None]:
print(
    "Mean inference time excluding 1st inference ",
    np.round(np.mean(times[1:]) * 1000, 2),
    "ms ±",
    np.round(np.std(times[1:]) * 1000, 2),
)
print(
    "Mean inference time including 1st inference ",
    np.round(np.mean(times) * 1000, 2),
    "ms ±",
    np.round(np.std(times) * 1000, 2),
)

In [None]:
import matplotlib.pyplot as plt

plt.plot(times[1:])

In [None]:
import numpy as np


def calculate_fps_stats(inference_times):
    """
    Calculates the average FPS rate and its standard deviation from a list of inference times.

    Args:
        inference_times (list): A list of inference times in seconds.

    Returns:
        tuple: A tuple containing the average FPS rate and its standard deviation.
    """

    # Calculate FPS for each frame
    fps_values = [1 / time for time in inference_times]

    # Calculate average FPS
    average_fps = np.mean(fps_values)

    # Calculate standard deviation of FPS
    std_dev_fps = np.std(fps_values)

    return average_fps, std_dev_fps


average_fps, std_dev_fps = calculate_fps_stats(times)

print("Average FPS:", average_fps)
print("Standard Deviation of FPS:", std_dev_fps)

In [None]:
import torch


def get_model_size(model_path):
    """
    Calculates the size of an ONNX model in bytes.

    Args:
        model_path (str): The path to the ONNX model file.

    Returns:
        int: The size of the model in bytes.
    """
    if ".onnx" in model_path:
        model = onnx.load(model_path)
        size_bytes = len(model.SerializeToString())
    elif ".pt" in model_path:
        model = torch.load(model_path)
        print(model["model"].keys())
        params = list(model.parameters())
        size_bytes = sum([p.numel() * p.element_size() for p in params])

    # Convert to KB, MB, GB, etc.
    if size_bytes < 1024:
        size_str = f"{size_bytes} B"
    elif size_bytes < 1024 * 1024:
        size_str = f"{size_bytes / 1024:.2f} KB"
    elif size_bytes < 1024 * 1024 * 1024:
        size_str = f"{size_bytes / (1024 * 1024):.2f} MB"
    else:
        size_str = f"{size_bytes / (1024 * 1024 * 1024):.2f} GB"

    return size_str


get_model_size(
    "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait/resnet_fp16.onnx"
)

In [None]:
import cv2


def get_video_characteristics(video_path):
    """
    Extracts the FPS, number of frames, length in seconds, and frame size of a video.

    Args:
        video_path (str): The path to the video file.

    Returns:
        tuple: A tuple containing the FPS, number of frames, length in seconds, and frame size.
    """

    cap = cv2.VideoCapture(video_path)

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Calculate video length in seconds
    video_length = frame_count / fps

    cap.release()

    return fps, frame_count, video_length, (frame_width, frame_height)


# Example usage:
video_path = "/media/dikra/PhD/DATA/DLC24_Data/dlc-live-dummy/ventral-gait/1_20cms_0degUP_first.avi"
fps, frame_count, video_length, frame_size = get_video_characteristics(video_path)

print("FPS:", fps)
print("Number of frames:", frame_count)
print("Video length (seconds):", video_length)
print("Frame size:", frame_size)

# Live video analysis

In [1]:

# Running the analyze_live_video function in a Jupyter notebook

from dlclive.LiveVideoInference import analyze_live_video

# Define the paths
model_path = "/Users/annastuckert/Documents/DLC_AI_Residency/DLC_AI2024/DeepLabCut-live/Hand-AnnaStuckert-2024-08-21/dlc-models-pytorch/iteration-0/HandAug21-trainset95shuffle101/train"

# Call the analyze_live_video function with the appropriate arguments
poses = analyze_live_video(
    camera=0,
    model_path=model_path,
    model_type="pytorch",
    snapshot="snapshot-200.pt",
    device="cpu",
    display=True,
    save_poses=True,
    resize=0.5,
    #precision="FP16",
    #cropping=[50, 250, 100, 450],  # manually set the cropping to specific pixels
    #dynamic=(
    #    True,
    #    0.5,
    #    10,
    #),  # True = apply dynamic cropping, 0.5 = threshold for KP detection, 10 = margin for cropping
    save_dir="output_directory",
    get_sys_info=True,
    draw_keypoint_names=True,
)


Loading DLC 3.0.0rc2...


  from .autonotebook import tqdm as notebook_tqdm


Loading the model took 6.620182991027832 sec
PyTorch inference took 0.30005407333374023 sec
PyTorch postprocessing took 0.012089967727661133 sec
Frame 0 processing time: 7.5922 seconds
PyTorch inference took 0.29785704612731934 sec
PyTorch postprocessing took 0.0017039775848388672 sec
Frame 1 processing time: 0.3845 seconds
PyTorch inference took 0.40530920028686523 sec
PyTorch postprocessing took 0.0023298263549804688 sec
Frame 2 processing time: 0.4795 seconds
PyTorch inference took 0.5319540500640869 sec
PyTorch postprocessing took 0.0025599002838134766 sec
Frame 3 processing time: 0.5984 seconds
PyTorch inference took 0.49686288833618164 sec
PyTorch postprocessing took 0.0032427310943603516 sec
Frame 4 processing time: 0.7457 seconds
PyTorch inference took 0.37366414070129395 sec
PyTorch postprocessing took 0.001766204833984375 sec
Frame 5 processing time: 0.4404 seconds
PyTorch inference took 0.3685030937194824 sec
PyTorch postprocessing took 0.0015978813171386719 sec
Frame 6 proc