In [None]:
from DeepLabCutImplementation.model import PoseModel

from Wrappers.backbone import get_HRNetCoAM_backbone
from Wrappers.head import get_heatmap_head

from DeepLabCutImplementation.runners.train import PoseTrainingRunner
from DeepLabCutImplementation.runners.snapshots import TorchSnapshotManager
from DeepLabCutImplementation.runners.schedulers import LRListScheduler
from DeepLabCutImplementation.runners.logger import CSVLogger

from DeepLabCutImplementation.task import Task
from DeepLabCutImplementation.data.transforms import CoarseDropout

from DeepLabCutImplementation.data.cocoloader import COCOLoader

import albumentations as A
import logging
from torch.optim import AdamW
from torch.utils.data import DataLoader

import os
import gc
from pathlib import Path
import torch

In [2]:
backbone1 = {
    "num_joints": 17,
    "att_heads": 1,
    "channel_att_only": False,
    "coam_modules": (2, ),
    "selfatt_coam_modules": None,
}

backbone2 = {
    "num_joints": 17,
    "att_heads": 2,
    "channel_att_only": True,
    "coam_modules": (1, 3, ),
    "selfatt_coam_modules": None,
}

backbone3 = {
    "num_joints": 17,
    "att_heads": 4,
    "channel_att_only": False,
    "coam_modules": (),
    "selfatt_coam_modules": (1, 3, )
}

backbones = []
backbones.append(backbone1)
backbones.append(backbone2)
backbones.append(backbone3)


bodypart_head1 = {
    "num_joints": 17,
    "channels": [48],
    "kernel_sizes": [],
    "kernel_size_final": 1,
    "strides": []
}

bodypart_head2 = {
    "num_joints": 17,
    "channels": [48, 17],
    "kernel_sizes": [6],
    "kernel_size_final": 1,
    "strides": [8]
}

bodypart_head3 = {
    "num_joints": 17,
    "channels": [48, 24],
    "kernel_sizes": [3],
    "kernel_size_final": 3,
    "strides": [4]
}

bodypart_heads = []
bodypart_heads.append(bodypart_head1)
bodypart_heads.append(bodypart_head2)
bodypart_heads.append(bodypart_head3)

models = []

for i in range(len(backbones)):
    for j in range(len(bodypart_heads)):
        model = PoseModel(
            backbone=get_HRNetCoAM_backbone(**backbones[i]),
            heads={
                "bodypart": get_heatmap_head(**bodypart_heads[j]),
            }
        )
        models.append(model)


Unexpected keys (downsamp_modules.0.1.num_batches_tracked, downsamp_modules.1.1.num_batches_tracked, downsamp_modules.2.1.num_batches_tracked, final_layer.1.num_batches_tracked, incre_modules.0.0.bn1.num_batches_tracked, incre_modules.0.0.bn2.num_batches_tracked, incre_modules.0.0.bn3.num_batches_tracked, incre_modules.0.0.downsample.1.num_batches_tracked, incre_modules.1.0.bn1.num_batches_tracked, incre_modules.1.0.bn2.num_batches_tracked, incre_modules.1.0.bn3.num_batches_tracked, incre_modules.1.0.downsample.1.num_batches_tracked, incre_modules.2.0.bn1.num_batches_tracked, incre_modules.2.0.bn2.num_batches_tracked, incre_modules.2.0.bn3.num_batches_tracked, incre_modules.2.0.downsample.1.num_batches_tracked, incre_modules.3.0.bn1.num_batches_tracked, incre_modules.3.0.bn2.num_batches_tracked, incre_modules.3.0.bn3.num_batches_tracked, incre_modules.3.0.downsample.1.num_batches_tracked, downsamp_modules.0.0.bias, downsamp_modules.0.0.weight, downsamp_modules.0.1.bias, downsamp_module

In [3]:
inference_transforms = []

inference_transforms.append(
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        )

inference_transforms = A.Compose(
        inference_transforms,
        keypoint_params=A.KeypointParams(
            "xy", remove_invisible=False, label_fields=["class_labels"]
        ),
        bbox_params=A.BboxParams(format="coco", label_fields=["bbox_labels"]),
        )

In [4]:
train_transforms = []

train_transforms.append(
            A.Affine(
                scale=[0.75, 1.25],
                rotate=(-45, 45),
                translate_px=(-50, 50),
                p=0.5,
                keep_ratio=True,
            )
        )
train_transforms.append(A.MotionBlur(p=0.5))

train_transforms.append(
            CoarseDropout(
                max_holes=10,
                min_holes=1,
                max_height=0.05,
                min_height=0.01,
                max_width=0.05,
                min_width=0.01,
                p=0.5,
            )
        )

train_transforms.append(
            A.GaussNoise(
                var_limit=(0, 12.75**2),
                mean=0,
                per_channel=True,
                p=0.5,
            )
        )

train_transforms.append(
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        )

train_transforms = A.Compose(
        train_transforms, 
        keypoint_params=A.KeypointParams(
            "xy", remove_invisible=False, label_fields=["class_labels"]
        ),
        bbox_params=A.BboxParams(format="coco", label_fields=["bbox_labels"]),
        )

In [5]:
data_loader = COCOLoader(
    project_root = "dataset/ap-10k",
    data_config = {
        "bbox_margin": 25,
        "colormode": "RGB",
        "inference": {
            "multithreading": {
                "enabled": True,
                "queue_length": 4,
                "timeout": 30,
            },
            "normalize_images": True,
            "top_down_crop": {
                "width": 256,
                "height": 256,
                "crop_with_context": False,
            }
        },
        "train": {
            "affine": {
                "p": 0.5,
                "rotation": 45,
                "scaling": [0.75, 1.25],
                "translation": 50,
            },
            "gaussian_noise": 12.75,
            "motion_blur": True,
            "normalize_images": True,
            "top_down_crop": {
                "width": 256,
                "height": 256,
                "crop_with_context": False,
            },
            "covering": True,
        },
        "gen_sampling": {
            "keypoint_sigmas": 0.1,
        }
    },
    train_json_filename = "ap10k-train-split1.json",
    test_json_filename = "ap10k-val-split1.json",
)

In [6]:
def train(
    loader,
    runner,
    model,
    task,
    device: str | None = "cuda",
    transform: A.BaseCompose | None = None,
    inference_transform: A.BaseCompose | None = None,
) -> None:
    """Builds a model from a configuration and fits it to a dataset

    Args:
        loader: the loader containing the data to train on/validate with
        run_config: the model and run configuration
        task: the task to train the model for
        device: the torch device to train on (such as "cpu", "cuda", "mps")
        gpus: the list of GPU indices to use for multi-GPU training
        logger_config: the configuration of a logger to use
        snapshot_path: if continuing to train from a snapshot, the path containing the
            weights to load
        transform: if defined, overwrites the transform defined in the model config
        inference_transform: if defined, overwrites the inference transform defined in
            the model config
        max_snapshots_to_keep: the maximum number of snapshots to store for each model
        load_head_weights: When `snapshot_path` is not None and a pose model is being
            trained, whether to load the head weights from the saved snapshot.
    """
    model.to(device)

    logging.info("Data Transforms:")
    logging.info(f"  Training:   {transform}")
    logging.info(f"  Validation: {inference_transform}")

    train_dataset = loader.create_dataset(transform=transform, mode="train", task=task)
    valid_dataset = loader.create_dataset(
        transform=inference_transform, mode="test", task=task
    )

    collate_fn = None

    batch_size = 16
    num_workers = 8
    pin_memory = True
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )
    valid_dataloader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

    logging.info(
        f"Using {len(train_dataset)} images and {len(valid_dataset)} for testing"
    )

    logging.info("\nStarting pose model training...\n" + (50 * "-"))

    runner.fit(
        train_dataloader,
        valid_dataloader,
        epochs=10,
        display_iters=200,
    )

In [7]:
logging.getLogger().setLevel(logging.INFO)

In [None]:
starting_lr = 0.001

start_model = 0
for i, model in enumerate(models[start_model:]):
    gc.collect()
    with torch.no_grad():
        torch.cuda.empty_cache()
    os.makedirs(f"training_logs/model_architecture{i+start_model}/snapshots", exist_ok=True)

    data_loader = COCOLoader(
        project_root = "dataset/ap-10k",
        data_config = {
            "bbox_margin": 25,
            "colormode": "RGB",
            "inference": {
                "multithreading": {
                    "enabled": True,
                    "queue_length": 4,
                    "timeout": 30,
                },
                "normalize_images": True,
                "top_down_crop": {
                    "width": 256,
                    "height": 256,
                    "crop_with_context": False,
                }
            },
            "train": {
                "affine": {
                    "p": 0.5,
                    "rotation": 45,
                    "scaling": [0.75, 1.25],
                    "translation": 50,
                },
                "gaussian_noise": 12.75,
                "motion_blur": True,
                "normalize_images": True,
                "top_down_crop": {
                    "width": 256,
                    "height": 256,
                    "crop_with_context": False,
                },
                "covering": True,
            },
            "gen_sampling": {
                "keypoint_sigmas": 0.1,
            }
        },
        train_json_filename = "ap10k-train-split1.json",
        test_json_filename = "ap10k-val-split1.json",
    )

    optimizer = AdamW(model.parameters(), lr=starting_lr)
    runner = PoseTrainingRunner(
        model=model,
        optimizer=optimizer,
        device="cuda",
        snapshot_manager=TorchSnapshotManager(
                snapshot_prefix="snapshot",
                model_folder=Path(f"training_logs/model_architecture{i+start_model}/snapshots"),
                key_metric="test.mAP",
                key_metric_asc=True,
                max_snapshots=2,
                save_epochs=5,
                save_optimizer_state=False,
        ),
        eval_interval=1,
        scheduler=LRListScheduler(optimizer, milestones=[6, 8, 9], lr_list=[starting_lr * 0.2, starting_lr * 0.04, starting_lr * 0.008]),
        load_scheduler_state_dict=True,
        logger=CSVLogger(train_folder=f"training_logs/model_architecture{i+start_model}", log_filename="training_log.csv"),
        load_weights_only=True,
    )

    train(data_loader, runner, model, task=Task.COND_TOP_DOWN, device="cuda", transform=train_transforms, inference_transform=inference_transforms)
    model.cpu()
    del model
    del runner
    del optimizer
    gc.collect()
    with torch.no_grad():
        torch.cuda.empty_cache()