In [1]:
# rf = Roboflow(api_key="RRJitllisZT37MLdkgjz")
# project = rf.workspace("project-rt7v1").project("-b107j")
# version = project.version(4)
# dataset = version.download("coco")
                

In [None]:
import os
import json
import torch
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator

# ----------------------------------------------
# 1. Кастомный класс трейнера с оценкой
# ----------------------------------------------
class CustomTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name):
        return COCOEvaluator(
            dataset_name=dataset_name,
            output_dir=os.path.join(cfg.OUTPUT_DIR, "evaluation"),
            use_fast_impl=False,
        )

# ----------------------------------------------
# 2. Функция загрузки данных (ИСПРАВЛЕН ПУТЬ К ИЗОБРАЖЕНИЯМ)
# ----------------------------------------------
def load_keypoints_dataset(json_file):
    with open(json_file) as f:
        data = json.load(f)

    # Проверка структуры аннотаций
    assert "categories" in data, "Categories section is missing in annotations!"
    assert len(data["categories"]) > 0, "No categories defined!"
    
    image_dict = {img["id"]: img for img in data["images"]}
    dataset_dicts = []
    
    for ann in data["annotations"]:
        # Проверка ключевых точек        
        record = {}
        image_id = ann["image_id"]
        image_info = image_dict[image_id]
        
        # Исправленный путь к изображениям
        image_dir = os.path.join(os.path.dirname(json_file))  # Добавлена папка images
        record["file_name"] = os.path.join(image_dir, image_info["file_name"])
        
        # Проверка существования файла
        if not os.path.exists(record["file_name"]):
            raise FileNotFoundError(f"Image not found: {record['file_name']}")

        record["width"] = image_info["width"]
        record["height"] = image_info["height"]
        record["image_id"] = image_id

        record["annotations"] = [{
            "bbox": ann["bbox"],
            "bbox_mode": BoxMode.XYWH_ABS,
            "keypoints": ann["keypoints"],
            "num_keypoints": 4,
            "category_id": 0,
            "iscrowd": ann.get("iscrowd", 0)
        }]
        
        dataset_dicts.append(record)
    
    return dataset_dicts

# ----------------------------------------------
# 3. Регистрация датасетов (ИСПРАВЛЕН FLIP_MAP)
# ----------------------------------------------
TRAIN_JSON = "/home/ilya/Documents/PAC2/скелет-коньков-4/train/_annotations.coco.json"
VAL_JSON = "/home/ilya/Documents/PAC2/скелет-коньков-4/valid/_annotations.coco.json"

# Очистка предыдущих регистраций
for name in ["my_dataset_train", "my_dataset_val"]:
    if name in DatasetCatalog.list():
        DatasetCatalog.remove(name)
    if name in MetadataCatalog.list():
        MetadataCatalog.remove(name)

DatasetCatalog.register("my_dataset_train", lambda: load_keypoints_dataset(TRAIN_JSON))
DatasetCatalog.register("my_dataset_val", lambda: load_keypoints_dataset(VAL_JSON))

# Метаданные с корректным flip_map
KEYPOINT_METADATA = {
    "thing_classes": ["skater"],
    "keypoint_names": ["head", "left_shoulder", "right_shoulder", "pelvis"],
    "keypoint_flip_map": [(1, 2), (0, 3)],  # Только симметричные точки
    "keypoint_skeleton": [[0, 1], [1, 3], [2, 3]],
    "keypoint_sigmas": [0.5, 0.5, 0.5, 0.5],
    "evaluator_type": "coco"
}

for name in ["my_dataset_train", "my_dataset_val"]:
    MetadataCatalog.get(name).set(**KEYPOINT_METADATA)

# ----------------------------------------------
# 4. Конфигурация модели (ПОЛНОЕ ПЕРЕОПРЕДЕЛЕНИЕ)
# ----------------------------------------------
cfg = get_cfg()

# Базовые параметры
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.BACKBONE.FREEZE_AT = 2  # Заморозка первых слоев
# Важные исправления:
# 1. Явное указание всех параметров ключевых точек
cfg.TEST.KEYPOINT_OKS_SIGMAS = [0.5] * 4
cfg.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 4
cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 2
cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = [512] * 8

# 2. Обязательное переопределение OKS сигм

# 3. Использование предобученных весов для бэкбона

# Остальные параметры
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 2  # Уменьшено для стабильности
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.STEPS = (100, 150)
cfg.TEST.EVAL_PERIOD = 50

# ----------------------------------------------
# 5. Запуск обучения с проверками
# ----------------------------------------------

print("\nFinal Keypoint config check:")
print(f"NUM_KEYPOINTS: {cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS}")
print(f"OKS_SIGMAS length: {len(cfg.TEST.KEYPOINT_OKS_SIGMAS)}")
print(f"OKS_SIGMAS value: {cfg.TEST.KEYPOINT_OKS_SIGMAS}")


if __name__ == "__main__":
    # Проверка метаданных
    metadata = MetadataCatalog.get("my_dataset_train")

    # Запуск обучения
    trainer = CustomTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()


Final Keypoint config check:
NUM_KEYPOINTS: 4
OKS_SIGMAS length: 4
OKS_SIGMAS value: [0.5, 0.5, 0.5, 0.5]
[32m[04/05 00:59:08 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), pad

Skip loading parameter 'roi_heads.keypoint_head.score_lowres.weight' to the model due to incompatible shapes: (512, 17, 4, 4) in the checkpoint but (512, 4, 4, 4) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.keypoint_head.score_lowres.bias' to the model due to incompatible shapes: (17,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_heads.keypoint_head.score_lowres.{bias, weight}[0m


[32m[04/05 00:59:08 d2.engine.train_loop]: [0mStarting training from iteration 0


  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[04/05 00:59:16 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   skater   | 2            |
|            |              |[0m
[32m[04/05 00:59:16 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[04/05 00:59:16 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[04/05 00:59:16 d2.data.common]: [0mSerializing 2 elements to byte tensors and concatenating them all ...
[32m[04/05 00:59:16 d2.data.common]: [0mSerialized dataset takes 0.00 MiB
[32m[04/05 00:59:16 d2.evaluation.coco_evaluation]: [0mTrying to convert 'my_dataset_val' to COCO format ...
[32m[04/05 00:59:16 d2.evaluation.evaluator]: [0mStart inference on 2 batches
[32m[04/05 00:59:19 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:01.53564

KeyboardInterrupt: 