In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cd /content/drive/MyDrive/horse_pose_estimation/horse_26kp/

In [None]:
!ls -F

In [None]:
!pip install mmpose mmdet mmcv-lite

# Task
Verify the MMPose installation, then inspect the dataset at `/content/drive/MyDrive/horse_pose_estimation/horse_26kp` to understand its YOLO format, and finally, convert the YOLO annotations to COCO format.

## Change Directory and Inspect Dataset

### Subtask:
Navigate to `/content/drive/MyDrive/horse_pose_estimation/horse_26kp` and list the contents of this directory to examine the `images` folder, `labels` folder, and `dataset.yaml` file, understanding the dataset's structure.


In [None]:
import os
if os.path.exists('images/') and os.path.exists('labels/') and os.path.exists('dataset.yaml'):
    print("Dataset structure appears correct with 'images/', 'labels/', and 'dataset.yaml' present.")
else:
    print("Warning: Expected dataset components (images/, labels/, dataset.yaml) not all found.")
!ls -F

Dataset structure appears correct with 'images/', 'labels/', and 'dataset.yaml' present.
annotations.json
coco_annotations/
correct_mapping.csv
dataset.yaml
diagnostic_n02389026_000000027646_horse.png
diagnostic_n02389026_000000032607_horse.png
diagnostic_n02389026_000000038828_horse.png
diagnostic_n02389026_000000081995_horse.png
diagnostic_n02389026_000000150098_horse.png
diagnostic_n02389026_000000206247_horse.png
diagnostic_n02389026_000000329262_horse.png
diagnostic_n02389026_000000356968_horse.png
diagnostic_n02389026_000000364586_horse.png
diagnostic_n02389026_000000374266_horse.png
diagnostic_n02389026_000000401720_horse.png
diagnostic_n02389026_000000464633_horse.png
diagnostic_n02389026_000000489266_horse.png
diagnostics_visualize.py
images/
labels/
mmpose_configs/
mmpose.ipynb
obj_files/
output_dirs/
skeleton_cvat.svg
skeleton_template.svg
train_subset.py
visualizations/
visualizations.cache
visualize_dataset.py


In [None]:
import os
import json
import yaml
from PIL import Image
import sys

# 1. Define the base directory and output directory
base_path = os.getcwd()  # Current working directory is /content/drive/MyDrive/horse_pose_estimation/horse_26kp
output_dir = os.path.join(base_path, 'coco_annotations')
os.makedirs(output_dir, exist_ok=True)

print(f"Base directory: {base_path}")
print(f"Output directory for COCO JSONs: {output_dir}")

# 2. Load dataset.yaml
dataset_yaml_path = os.path.join(base_path, 'dataset.yaml')
with open(dataset_yaml_path, 'r') as f:
    dataset_config = yaml.safe_load(f)

# Extract keypoint names from the commented section in dataset.yaml
# This requires manually defining the list based on the provided comments
keypoint_names = [
    'L_Eye', 'R_Eye', 'Nose', 'L_EarBase', 'R_EarBase', 'L_EarTip', 'R_EarTip', 'Throat', 'Withers',
    'L_F_Elbow', 'R_F_Elbow', 'L_F_Knee', 'R_F_Knee', 'L_F_Paw', 'R_F_Paw',
    'L_B_Elbow', 'R_B_Elbow', 'L_B_Knee', 'R_B_Knee', 'L_B_Paw', 'R_B_Paw',
    'TailBase', 'Spine1', 'Spine2', 'Spine3', 'TailTip'
]

num_keypoints = dataset_config['kpt_shape'][0]
class_names = dataset_config['names']

print(f"Loaded {num_keypoints} keypoint names: {keypoint_names}")
print(f"Loaded class names: {class_names}")

# 3. Define COCO_KEYPOINT_CONNECTIONS for the horse skeleton
# This is a placeholder and can be refined based on domain knowledge.
# Connections are 0-indexed keypoint indices.
COCO_KEYPOINT_CONNECTIONS = [
    # Head
    (0, 2), (1, 2), (3, 4), (5, 6), # L_Eye-Nose, R_Eye-Nose, L_EarBase-R_EarBase, L_EarTip-R_EarTip
    (3, 5), (4, 6), (2, 7), # Ear bases to tips, Nose to Throat
    # Body
    (7, 8), (8, 21), (21, 22), (22, 23), (23, 24), (24, 25), # Throat-Withers, Withers-TailBase, Spine
    # Front Left Leg
    (8, 9), (9, 11), (11, 13), # Withers-L_F_Elbow, L_F_Elbow-L_F_Knee, L_F_Knee-L_F_Paw
    # Front Right Leg
    (8, 10), (10, 12), (12, 14), # Withers-R_F_Elbow, R_F_Elbow-R_F_Knee, R_F_Knee-R_F_Paw
    # Back Left Leg
    (21, 15), (15, 17), (17, 19), # TailBase-L_B_Elbow, L_B_Elbow-L_B_Knee, L_B_Knee-L_B_Paw
    # Back Right Leg
    (21, 16), (16, 18), (18, 20) # TailBase-R_B_Elbow, R_B_Elbow-R_B_Knee, R_B_Knee-R_B_Paw
]

print(f"Defined {len(COCO_KEYPOINT_CONNECTIONS)} keypoint connections.")

# 4. Create the COCO categories list
categories = [
    {
        'id': list(class_names.keys())[0], # Assuming only one class (horse)
        'name': list(class_names.values())[0], # Assuming only one class (horse)
        'supercategory': 'animal',
        'keypoints': keypoint_names,
        'skeleton': COCO_KEYPOINT_CONNECTIONS
    }
]

print(f"Created COCO categories for class '{categories[0]['name']}'.")

def convert_yolo_to_coco(split_name):
    coco_data = {
        'images': [],
        'annotations': [],
        'categories': categories
    }

    image_id_counter = 0
    annotation_id_counter = 0

    images_dir = os.path.join(base_path, 'images', split_name)
    labels_dir = os.path.join(base_path, 'labels', split_name)

    image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    image_files.sort() # Ensure consistent ordering

    print(f"Processing {len(image_files)} images for {split_name} split...")

    for img_filename in image_files:
        img_path = os.path.join(images_dir, img_filename)
        label_filename = img_filename.replace('.jpg', '.txt').replace('.jpeg', '.txt').replace('.png', '.txt')
        label_path = os.path.join(labels_dir, label_filename)

        try:
            with Image.open(img_path) as img:
                width, height = img.size
        except Exception as e:
            print(f"Error opening image {img_path}: {e}")
            continue

        coco_image = {
            'id': image_id_counter,
            'width': width,
            'height': height,
            'file_name': os.path.join('images', split_name, img_filename) # Path relative to dataset root
        }
        coco_data['images'].append(coco_image)

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    parts = list(map(float, line.strip().split()))

                    class_id_yolo = int(parts[0])
                    # YOLO bbox: class_id, center_x, center_y, bbox_width, bbox_height
                    yolo_center_x, yolo_center_y, yolo_bbox_width, yolo_bbox_height = parts[1:5]

                    # Convert YOLO bbox to COCO (x_min, y_min, width, height) in pixels
                    bbox_width_abs = yolo_bbox_width * width
                    bbox_height_abs = yolo_bbox_height * height
                    x_min = (yolo_center_x * width) - (bbox_width_abs / 2)
                    y_min = (yolo_center_y * height) - (bbox_height_abs / 2)
                    coco_bbox = [x_min, y_min, bbox_width_abs, bbox_height_abs]
                    area = bbox_width_abs * bbox_height_abs

                    # YOLO keypoints: x1, y1, v1, x2, y2, v2, ... (normalized)
                    yolo_kpts = parts[5:]

                    # Ensure enough parts for 26 keypoints * 3 values (x, y, v)
                    expected_kpt_len = num_keypoints * 3
                    if len(yolo_kpts) < expected_kpt_len:
                        print(f"Warning: Not enough keypoint data in {label_path} for image {img_filename}. Expected {expected_kpt_len}, got {len(yolo_kpts)}. Padding with 0s.")
                        # Pad with invisible keypoints if data is missing
                        yolo_kpts.extend([0.0] * (expected_kpt_len - len(yolo_kpts)))
                    elif len(yolo_kpts) > expected_kpt_len:
                        print(f"Warning: Too many keypoint data in {label_path} for image {img_filename}. Expected {expected_kpt_len}, got {len(yolo_kpts)}. Truncating.")
                        yolo_kpts = yolo_kpts[:expected_kpt_len]


                    coco_keypoints = []
                    for i in range(num_keypoints):
                        x_norm = yolo_kpts[i*3]
                        y_norm = yolo_kpts[i*3 + 1]
                        visibility = int(yolo_kpts[i*3 + 2])

                        # Convert normalized keypoints to absolute pixel values
                        coco_keypoints.append(x_norm * width)
                        coco_keypoints.append(y_norm * height)
                        coco_keypoints.append(visibility) # Visibility is 0 (not labeled), 1 (labeled but not visible), 2 (labeled and visible)

                    coco_annotation = {
                        'id': annotation_id_counter,
                        'image_id': image_id_counter,
                        'category_id': class_id_yolo, # Assuming YOLO class_id maps directly to COCO category id
                        'bbox': coco_bbox,
                        'area': area,
                        'iscrowd': 0,
                        'keypoints': coco_keypoints,
                        'num_keypoints': sum(1 for v in coco_keypoints[2::3] if v > 0), # Count visible keypoints
                        'segmentation': [], # Not using segmentation for pose estimation
                    }
                    coco_data['annotations'].append(coco_annotation)
                    annotation_id_counter += 1
        else:
            print(f"Warning: No label file found for {img_filename} at {label_path}")

        image_id_counter += 1

    output_json_path = os.path.join(output_dir, f'annotations_{split_name}.json')
    with open(output_json_path, 'w') as f:
        json.dump(coco_data, f, indent=4)
    print(f"Generated COCO annotations for {split_name} at {output_json_path}")

# 5. Call the conversion function for train and val splits
print("Starting YOLO to COCO conversion...")
convert_yolo_to_coco('train')

print("YOLO to COCO conversion complete.")

Base directory: /content/drive/MyDrive/horse_pose_estimation/horse_26kp
Output directory for COCO JSONs: /content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations
Loaded 26 keypoint names: ['L_Eye', 'R_Eye', 'Nose', 'L_EarBase', 'R_EarBase', 'L_EarTip', 'R_EarTip', 'Throat', 'Withers', 'L_F_Elbow', 'R_F_Elbow', 'L_F_Knee', 'R_F_Knee', 'L_F_Paw', 'R_F_Paw', 'L_B_Elbow', 'R_B_Elbow', 'L_B_Knee', 'R_B_Knee', 'L_B_Paw', 'R_B_Paw', 'TailBase', 'Spine1', 'Spine2', 'Spine3', 'TailTip']
Loaded class names: {0: 'horse'}
Defined 25 keypoint connections.
Created COCO categories for class 'horse'.
Starting YOLO to COCO conversion...
Processing 639 images for train split...
Generated COCO annotations for train at /content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_train.json
YOLO to COCO conversion complete.


In [None]:
convert_yolo_to_coco('val')

Processing 71 images for val split...
Generated COCO annotations for val at /content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_val.json


In [None]:
!pip install -U openmim
!mim install mmengine

Traceback (most recent call last):
  File "/usr/local/bin/mim", line 5, in <module>
    from mim.cli import cli
  File "/usr/local/lib/python3.12/dist-packages/mim/__init__.py", line 10, in <module>
    import setuptools  # noqa: F401
    ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/setuptools/__init__.py", line 16, in <module>
    import setuptools.version
  File "/usr/local/lib/python3.12/dist-packages/setuptools/version.py", line 1, in <module>
    import pkg_resources
  File "/usr/local/lib/python3.12/dist-packages/pkg_resources/__init__.py", line 2172, in <module>
    register_finder(pkgutil.ImpImporter, find_on_path)
                    ^^^^^^^^^^^^^^^^^^^
AttributeError: module 'pkgutil' has no attribute 'ImpImporter'. Did you mean: 'zipimporter'?


In [None]:
!pip install mmengine



In [None]:
from mmengine.config import Config
from mmengine.runner import Runner
from mmpose.utils import register_all_modules

# Register all MMPose modules
register_all_modules()



AssertionError: MMCV==2.2.0 is used but incompatible. Please install mmcv>=2.0.0rc4, <2.2.0.

In [None]:
cfg = Config(dict(
    model=dict(
        type='TopdownPoseEstimator',
        data_preprocessor=dict(
            type='PoseDataPreprocessor',
            mean=[123.675, 116.28, 103.53],
            std=[58.395, 57.12, 57.375],
            bgr_to_rgb=True),
        backbone=dict(
            type='ResNet',
            depth=50,
            init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
        head=dict(
            type='HeatmapHead',
            in_channels=2048,
            out_channels=26,
            deconv_out_channels=(256, 256, 256),
            deconv_kernel_sizes=(4, 4, 4),
            loss=dict(type='KeypointMSELoss', use_target_weight=True))),

    train_dataloader=dict(
        batch_size=8,
        num_workers=2,
        persistent_workers=True,
        sampler=dict(type='DefaultSampler', shuffle=True),
        dataset=dict(
            type='CocoDataset',
            data_root='/content/drive/MyDrive/horse_pose_estimation/horse_26kp',
            ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_train.json',
            data_prefix=dict(img='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/images/train'),
            pipeline=[
                dict(type='LoadImage'),
                dict(type='GetBBoxCenterScale'),
                dict(type='RandomFlip', direction='horizontal'),
                dict(type='TopdownAffine', input_size=(256, 256)),
                dict(type='GenerateTarget',
                     encoder=dict(type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)),
                dict(type='PackPoseInputs')])),

    val_dataloader=dict(
        batch_size=8,
        num_workers=2,
        persistent_workers=True,
        sampler=dict(type='DefaultSampler', shuffle=False),
        dataset=dict(
            type='CocoDataset',
            data_root='/content/drive/MyDrive/horse_pose_estimation/horse_26kp',
            ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_val.json',
            data_prefix=dict(img='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/images/val'),
            pipeline=[
                dict(type='LoadImage'),
                dict(type='GetBBoxCenterScale'),
                dict(type='TopdownAffine', input_size=(256, 256)),
                dict(type='PackPoseInputs')])),

    val_evaluator=dict(type='CocoMetric', ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_val.json'),
    # test_evaluator=dict(type='CocoMetric', ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_val.json'),

    train_cfg=dict(by_epoch=True, max_epochs=100, val_interval=10),val_cfg=dict(),

    optim_wrapper=dict(optimizer=dict(type='Adam', lr=0.001)),

    param_scheduler=[dict(type='MultiStepLR', by_epoch=True, milestones=[80, 95], gamma=0.1)],

    default_hooks=dict(
        checkpoint=dict(type='CheckpointHook', interval=10)),

    work_dir='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/output_dirs/'
))

In [None]:
!pip install setuptools==69.5.1

Collecting setuptools==69.5.1
  Using cached setuptools-69.5.1-py3-none-any.whl.metadata (6.2 kB)
Using cached setuptools-69.5.1-py3-none-any.whl (894 kB)
Installing collected packages: setuptools
  Attempting uninstall: setuptools
    Found existing installation: setuptools 60.2.0
    Uninstalling setuptools-60.2.0:
      Successfully uninstalled setuptools-60.2.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 7.34.0 requires jedi>=0.16, which is not installed.
openxlab 0.1.3 requires setuptools~=60.2.0, but you have setuptools 69.5.1 which is incompatible.
pytensor 2.37.0 requires filelock>=3.15, but you have filelock 3.14.0 which is incompatible.[0m[31m
[0mSuccessfully installed setuptools-69.5.1


In [None]:
!pip install mmcv==2.1.0

Collecting mmcv==2.1.0
  Downloading mmcv-2.1.0.tar.gz (471 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/471.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m471.0/471.4 kB[0m [31m17.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.4/471.4 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: mmcv


In [None]:
from mmengine.config import Config
from mmengine.runner import Runner
from mmpose.utils import register_all_modules

# Register all MMPose modules FIRST
register_all_modules()

# Then create your config
cfg = Config(dict(
    model=dict(
        type='TopdownPoseEstimator',
        data_preprocessor=dict(
            type='PoseDataPreprocessor',
            mean=[123.675, 116.28, 103.53],
            std=[58.395, 57.12, 57.375],
            bgr_to_rgb=True),
        backbone=dict(
            type='ResNet',
            depth=50,
            init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
        head=dict(
            type='HeatmapHead',
            in_channels=2048,
            out_channels=26,
            deconv_out_channels=(256, 256, 256),
            deconv_kernel_sizes=(4, 4, 4),
            loss=dict(type='KeypointMSELoss', use_target_weight=True))),

    train_dataloader=dict(
        batch_size=8,
        num_workers=2,
        persistent_workers=True,
        sampler=dict(type='DefaultSampler', shuffle=True),
        dataset=dict(
            type='CocoDataset',
            data_root='/content/drive/MyDrive/horse_pose_estimation/horse_26kp',
            ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_train.json',
            data_prefix=dict(img='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/images/train'),
            pipeline=[
                dict(type='LoadImage'),
                dict(type='GetBBoxCenterScale'),
                dict(type='RandomFlip', direction='horizontal'),
                dict(type='TopdownAffine', input_size=(256, 256)),
                dict(type='GenerateTarget',
                     encoder=dict(type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)),
                dict(type='PackPoseInputs')])),

    val_dataloader=dict(
        batch_size=8,
        num_workers=2,
        persistent_workers=True,
        sampler=dict(type='DefaultSampler', shuffle=False),
        dataset=dict(
            type='CocoDataset',
            data_root='/content/drive/MyDrive/horse_pose_estimation/horse_26kp',
            ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_val.json',
            data_prefix=dict(img='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/images/val'),
            pipeline=[
                dict(type='LoadImage'),
                dict(type='GetBBoxCenterScale'),
                dict(type='TopdownAffine', input_size=(256, 256)),
                dict(type='PackPoseInputs')])),

    val_evaluator=dict(type='CocoMetric', ann_file='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/coco_annotations/annotations_val.json'),

    train_cfg=dict(by_epoch=True, max_epochs=100, val_interval=10),
    val_cfg=dict(),

    optim_wrapper=dict(optimizer=dict(type='Adam', lr=0.001)),
    param_scheduler=[dict(type='MultiStepLR', by_epoch=True, milestones=[80, 95], gamma=0.1)],
    default_hooks=dict(checkpoint=dict(type='CheckpointHook', interval=10)),
    work_dir='/content/drive/MyDrive/horse_pose_estimation/horse_26kp/output_dirs/'
))

runner = Runner.from_cfg(cfg)
runner.train()

AssertionError: MMCV==2.2.0 is used but incompatible. Please install mmcv>=2.0.0rc4, <2.2.0.