# Let's try to figure out how to develop RTMDet

## First, let's understand what is the difference between CSPNext and RTMPose

### (RTMPose is CSPNext + SimCC HEAD?)

In [19]:
import torch
x = torch.randn(1, 3, 256, 256)

# ----

# First, let's create a CSPNeXt object
from deeplabcut.pose_estimation_pytorch.models.backbones.cspnext import CSPNeXt

cspnext = CSPNeXt()
print(type(cspnext)) # cspnext is of type deeplabcut.pose_estimation_pytorch.models.backbones.cspnext.CSPNeXt , which is a nn.Module
print(f"Nb parameters: {sum(p.numel() for p in cspnext.parameters())}")
# print(cspnext) # This floods the output
y = cspnext(x)
print(f"Shape of output of dummy tensor: {y.shape}")

print("-"* 80)

# Next, let's create a RTMPose nn.Module object
from deeplabcut.pose_estimation_pytorch.config.utils import get_config_folder_path, replace_default_values
from deeplabcut.core.config import read_config_as_dict
from deeplabcut.pose_estimation_pytorch.models import PoseModel

net_type = "rtmpose_m" # for example
nb_bodyparts = 5 # for example

configs_dir = get_config_folder_path()
architecture = net_type.split("_")[0]
cfg_path = cfg_path = configs_dir / architecture / f"{net_type}.yaml"
model_cfg = read_config_as_dict(cfg_path)
model_cfg = replace_default_values(
    model_cfg,
    num_bodyparts=nb_bodyparts,
) # Interesting observation: the yaml file defines values that depend on nb_individuals and/or nb_bodyparts, and are updated with real values once known (when creating the actual pytorch_config.yaml)
rtmpose = PoseModel.build(model_cfg["model"]) # here, there might be some optional parameters, todo investigate
print(type(rtmpose))
print(f"Nb parameters: {sum(p.numel() for p in rtmpose.parameters())}")
# print(rtmpose) # This floods the output
z = rtmpose(x)
print(f"Shape of output of dummy tensor: {z['bodypart']['x'].shape} , {z['bodypart']['y'].shape}")
# Okay so basically, the first level keys are defined by the pytorch_config.yaml (in the heads block), and the second level keys are defined in the RTMCCHead forward method.
# The first level keys are certainly added when doing PoseModel.build
# When printing rtmpose, there are different submodules (backbone, head). They are very certainly created during PoseModel.build()
# The differences in sizes of rtmpose are the same ones as the difference in sizes of cspnext. 
# The different parameters (deepen_factor, widen_factor, backbone_output_channels) are configured in the rtmpose yaml files.

# - state_dict() returns a dictionary containing all the model’s learnable parameters and buffers,
# while load_state_dict() restores those values into a model with the same architecture.

<class 'deeplabcut.pose_estimation_pytorch.models.backbones.cspnext.CSPNeXt'>
Nb parameters: 12279432
Shape of output of dummy tensor: torch.Size([1, 768, 8, 8])
--------------------------------------------------------------------------------
<class 'deeplabcut.pose_estimation_pytorch.models.model.PoseModel'>
Nb parameters: 13172879
Shape of output of dummy tensor: torch.Size([1, 5, 512]) , torch.Size([1, 5, 512])


## Then, let's dive into how the existing detectors work

## Finally, let's instantiate the official RTMDet network for comparison