# Let's try to figure out how to develop RTMDet

## First, let's understand what is the difference between CSPNext and RTMPose

### (RTMPose is CSPNext + SimCC HEAD?)

In [None]:
import torch
x = torch.randn(1, 3, 256, 256)

# ----

# First, let's create a CSPNeXt object
from deeplabcut.pose_estimation_pytorch.models.backbones.cspnext import CSPNeXt

cspnext = CSPNeXt()
print(type(cspnext)) # cspnext is of type deeplabcut.pose_estimation_pytorch.models.backbones.cspnext.CSPNeXt , which is a nn.Module
print(f"Nb parameters: {sum(p.numel() for p in cspnext.parameters())}")
# print(cspnext) # This floods the output
y = cspnext(x)
print(f"Shape of output of dummy tensor: {y.shape}")

print("-"* 80)

# Next, let's create a RTMPose nn.Module object
from deeplabcut.pose_estimation_pytorch.config.utils import get_config_folder_path, replace_default_values
from deeplabcut.core.config import read_config_as_dict
from deeplabcut.pose_estimation_pytorch.models import PoseModel

net_type = "rtmpose_m" # for example
nb_bodyparts = 5 # for example

configs_dir = get_config_folder_path()
architecture = net_type.split("_")[0]
cfg_path = cfg_path = configs_dir / architecture / f"{net_type}.yaml"
model_cfg = read_config_as_dict(cfg_path)
model_cfg = replace_default_values(
    model_cfg,
    num_bodyparts=nb_bodyparts,
) # Interesting observation: the yaml file defines values that depend on nb_individuals and/or nb_bodyparts, and are updated with real values once known (when creating the actual pytorch_config.yaml)
rtmpose = PoseModel.build(model_cfg["model"]) # here, there might be some optional parameters, todo investigate
print(type(rtmpose))
print(f"Nb parameters: {sum(p.numel() for p in rtmpose.parameters())}")
# print(rtmpose) # This floods the output
z = rtmpose(x)
print(f"Shape of output of dummy tensor: {z['bodypart']['x'].shape} , {z['bodypart']['y'].shape}")
# Okay so basically, the first level keys are defined by the pytorch_config.yaml (in the heads block), and the second level keys are defined in the RTMCCHead forward method.
# The first level keys are certainly added when doing PoseModel.build
# When printing rtmpose, there are different submodules (backbone, head). They are very certainly created during PoseModel.build()
# The differences in sizes of rtmpose are the same ones as the difference in sizes of cspnext. 
# The different parameters (deepen_factor, widen_factor, backbone_output_channels) are configured in the rtmpose yaml files.

# - state_dict() returns a dictionary containing all the model’s learnable parameters and buffers,
# while load_state_dict() restores those values into a model with the same architecture.

## Then, let's dive into how the existing detectors work

In [None]:
from deeplabcut.pose_estimation_pytorch.models.detectors.fasterRCNN import FasterRCNN

# Instantiate a pretrained Faster R-CNN with a MobileNetV3 backbone
detector = FasterRCNN(
    variant="fasterrcnn_mobilenet_v3_large_fpn",  # or "fasterrcnn_resnet50_fpn"
    pretrained=True,                              # load COCO pretrained weights
    box_score_thresh=0.05,                        # filter weak detections
)
# This throws away the Head and loads an other one, so the Head is not pretrained anymore.

# Dummy input batch of 2 RGB images, 3x224x224 each
images = [torch.rand(3, 224, 224), torch.rand(3, 224, 224)]

# During inference (no targets)
detector.eval()
with torch.no_grad():
    losses, detections = detector(images)

print(detections)


## Finally, let's instantiate the official RTMDet network for comparison

In [None]:
# this must be performed in the openmmlab environment.
from mmdet.utils import register_all_modules
register_all_modules()

import torch
from mmengine import Config
from mmdet.registry import MODELS

#x = torch.randn(1, 3, 256, 256)

# Load an RTMDet config — you can use any variant: tiny, s, m, l, x
rtmdet_cfg = Config.fromfile('/home/max/Work/mmdetection/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py')

# Build the model
rtmdet = MODELS.build(rtmdet_cfg.model)

# Put it in evaluation mode (no gradients, etc.)
rtmdet.eval()

print(type(rtmdet))

print(f"Nb parameters: {sum(p.numel() for p in rtmdet.parameters())}")

#print(rtmdet) # This floods the output
print(type(rtmdet.backbone))
print(type(rtmdet.neck))
print(type(rtmdet.bbox_head))


In [None]:
from mmdet.apis import DetInferencer

inferencer = DetInferencer(
    model="/home/max/Work/mmdetection/rtmdet_tiny_8xb32-300e_coco.py",
    weights="/home/max/Work/mmdetection/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth",
    device="cpu",
)

inferencer.model.test_cfg.chunked_size = -1

inferencer(
    inputs="/home/max/Work/mmdetection/demo/demo.jpg",
    out_dir="/home/max/Work/mmdetection/outputs",
    no_save_pred=False,
)