In [None]:
sleap2dlc = False
l3d2dlc = True

In [None]:
#
# DeepLabCut Toolbox (deeplabcut.org)
# © A. & M.W. Mathis Labs
# https://github.com/DeepLabCut/DeepLabCut
#
# Please see AUTHORS for contributors.
# https://github.com/DeepLabCut/DeepLabCut/blob/master/AUTHORS
#
# Licensed under GNU Lesser General Public License v3.0
#
"""
DeepLabCut2.0 Toolbox (deeplabcut.org)
© A. & M. Mathis Labs
https://github.com/DeepLabCut/DeepLabCut
Please see AUTHORS for contributors.

https://github.com/DeepLabCut/DeepLabCut/blob/master/AUTHORS
Licensed under GNU Lesser General Public License v3.0
"""
import io
import json
import os
import time
from itertools import islice
from pathlib import Path

import PIL.Image as Image
import cv2
import h5py
import numpy as np
import pandas as pd
import yaml

from deeplabcut.utils import auxiliaryfunctions

SUPPORTED_FILETYPES = "csv", "nwb"

def convertcsv2h5(config, scorer=None):
    """
    Convert (image) annotation files in folder labeled-data from csv to h5.
    This function allows the user to manually edit the csv (e.g. to correct the scorer name and then convert it into hdf format).
    WARNING: conversion might corrupt the data.

    config : string
        Full path of the config.yaml file as a string.

    scorer: string, optional
        If a string is given, then the scorer/annotator in all csv and hdf files that are changed, will be overwritten with this name.

    Examples
    --------
    Convert csv annotation files for reaching-task project into hdf.
    >>> deeplabcut.convertcsv2h5('/analysis/project/reaching-task/config.yaml')

    --------
    Convert csv annotation files for reaching-task project into hdf while changing the scorer/annotator in all annotation files to Albert!
    >>> deeplabcut.convertcsv2h5('/analysis/project/reaching-task/config.yaml',scorer='Albert')
    --------
    """
    cfg = auxiliaryfunctions.read_config(config)
    videos = cfg["video_sets"].keys()
    video_names = [Path(i).stem for i in videos]
    folders = [Path(config).parent / "labeled-data" / Path(i) for i in video_names]
    if not scorer:
        scorer = cfg["scorer"]

    for folder in folders:
        try:
                fn = os.path.join(
                    str(folder), "CollectedData_" + cfg["scorer"] + ".csv"
                )
                # Determine whether the data are single- or multi-animal without loading into memory
                # simply by checking whether 'individuals' is in the second line of the CSV.
                with open(fn) as datafile:
                    head = list(islice(datafile, 0, 5))
                if "individuals" in head[1]:
                    header = list(range(4))
                else:
                    header = list(range(3))
                if head[-1].split(",")[0] == "labeled-data":
                    index_col = [0, 1, 2]
                else:
                    index_col = 0
                data = pd.read_csv(fn, index_col=index_col, header=header)
                data.columns = data.columns.set_levels([scorer], level="scorer")
                guarantee_multiindex_rows(data)
                data.to_hdf(fn.replace(".csv", ".h5"), key="df_with_missing", mode="w")
                data.to_csv(fn)
        except FileNotFoundError:
            print("Attention:", folder, "does not appear to have labeled data!")

def guarantee_multiindex_rows(df):
    # Make paths platform-agnostic if they are not already
    if not isinstance(df.index, pd.MultiIndex):  # Backwards compatibility
        path = df.index[0]
        try:
            sep = "/" if "/" in path else "\\"
            splits = tuple(df.index.str.split(sep))
            df.index = pd.MultiIndex.from_tuples(splits)
        except TypeError:  #  Ignore numerical index of frame indices
            pass

    # Ensure folder names are strings
    try:
        df.index = df.index.set_levels(df.index.levels[1].astype(str), level=1)
    except AttributeError:
        pass

def convert_sleap_to_deeplabcut(
    sleap_file: str | Path,
    deeplabcut_dir: str,
    scorer: str = "me",
    multianimal: bool = False,
    max_instances: int = 2
):
    """
    Convert a SLEAP project into a DeepLabCut project
    WARNING: Conversion might corrupt the data.

    Once conversion is complete, you will need to manually create a DeepLabCut config.yaml file based
    on the keypoint names in the generated .csv files.
    Finally, run deeplabcut.convertcsv2h5('config.yaml')

    Args:
        sleap_file : string | Path
            Path to the .slp file.
        deeplabcut_dir: string
            Output directory.
        scorer: string, optional
            Name of scorer.
        multi: boolean, optional
            If True, the output will be formatted for a maDLC project.
        max_instances: int, optional
            Maximum number of instances (animals) to include per frame.
            This will define the number of individuals expected in the DLC output
    """
    config = {
        "scorer": scorer,
        "multianimalproject": multianimal,
        "date": time.strftime("%Y-%m-%d"),
        "Task": f"{scorer}-{sleap_file.split('labels.')[1].rsplit('.pkg.slp', 1)[0]}",
        "project_path": deeplabcut_dir,
        "engine": "pytorch",
        "video_sets": {},
        "start": 0,
        "stop": 1,
        "numframes2pick": 20,
        "skeleton_color": "white",
        "pcutoff": 0.6,
        "dotsize": 12,
        "alphavalue": 0.6,
        "colormap": "rainbow",
        "TrainingFraction": [0.95],
        "iteration": 0,
        "default_net_type": "resnet_50",
        "default_augmenter": "default",
        "snapshotindex": -1,
        "detector_snapshotindex": -1,
        "batch_size": 8,
        "detector_batch_size": 1,
        "cropping": False,
        "x1": 0,
        "x2": 640,
        "y1": 277,
        "y2": 624,
        "corner2move2": [50, 50],
        "move2corner": True,
        "SuperAnimalConversionTables": None,
    }

    # create directories
    if not os.path.exists(deeplabcut_dir):
        os.makedirs(deeplabcut_dir)
        os.makedirs(os.path.join(deeplabcut_dir, "labeled-data"))
        os.makedirs(os.path.join(deeplabcut_dir, "videos"))

    # parse .slp file
    with h5py.File(sleap_file, "r") as hdf_file:
        # Identify video names
        video_names = {}
        for video_group_name in hdf_file.keys():
            if video_group_name.startswith("video"):
                source_video_path = f"{video_group_name}/source_video"
                if source_video_path in hdf_file:
                    source_video_json = hdf_file[source_video_path].attrs["json"]
                    source_video_dict = json.loads(source_video_json)
                    video_filename = source_video_dict["backend"]["filename"]
                    video_names[video_group_name] = video_filename


        # Read sleap track data
        track_index_to_identifier = {}
        display_individuals = []
        if "tracks_json" in hdf_file:
            tracks_dataset = hdf_file["tracks_json"]
            tracks_json_raw = tracks_dataset[:]
            for i, track_bytes in enumerate(tracks_json_raw):
                track_string = track_bytes.decode('utf-8')
                track_list = json.loads(track_string)

                if isinstance(track_list, list) and len(track_list) >= 2:
                    track_identifier = str(track_list[1]) # Ensure it's a string, e.g., "1", "2", "Mouse1"
                    track_index_to_identifier[i] = track_identifier
                    display_individuals.append(track_identifier)
        else:
            print("Warning: 'tracks_json' dataset not found in SLEAP file.")

        # Handle track data according to multianimal and max_instance setting
        if not multianimal:
            display_individuals = ["1"] # Default single animal identity
            track_index_to_identifier = {0: "1"} # Map SLEAP's first track (index 0) to "1"
            max_instances = 1 # Force max_instances to 1 for single animal mode
        else:
            # If tracks_json didn't provide enough identities, or if it's missing, generate default ones up to max_instances.
            if len(display_individuals) < max_instances:
                for i in range(len(display_individuals), max_instances):
                    default_name = str(i + 1)
                    display_individuals.append(default_name)
                    # Also update mapping for these default individuals
                    if i not in track_index_to_identifier:
                        track_index_to_identifier[i] = default_name
            # If more tracks were detected than max_instances, truncate to max_instances
            elif len(display_individuals) > max_instances:
                display_individuals = display_individuals[:max_instances]
                # Rebuild track_index_to_identifier to match the truncated list
                track_index_to_identifier = {
                    i: display_individuals[i] for i in range(max_instances)
                }

        print(f"Identified individuals for DLC: {display_individuals}")
        print(f"Internal track index to display identifier mapping: {track_index_to_identifier}")
            
        # Extract and save images for each video
        for video_group, video_filename in video_names.items():
            data_frames = []
            scorer_row, bodyparts_row, coords_row = None, None, None
            output_dir = os.path.join(
                deeplabcut_dir,
                "labeled-data",
                os.path.basename(video_filename).split(".")[0],
            )
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            # extract labeled frames and save them in a separate directory for each video
            if video_group in hdf_file and "video" in hdf_file[video_group]:
                video_data = hdf_file[f"{video_group}/video"][:]
                frame_numbers = hdf_file[f"{video_group}/frame_numbers"][:]
                frame_names = []
                for i, (img_bytes, frame_number) in enumerate(
                    zip(video_data, frame_numbers)
                ):
                    img = Image.open(io.BytesIO(np.array(img_bytes, dtype=np.uint8)))
                    img = np.array(img)
                    if i == 0:
                        video_path = os.path.join(
                            deeplabcut_dir,
                            "videos",
                            video_names[video_group].split("/")[-1],
                        )
                        config["video_sets"][video_path] = {
                            "crop": f"0, {img.shape[1]}, 0, {img.shape[0]}"
                        }
                    frame_name = f"img{str(frame_number).zfill(8)}.png"
                    cv2.imwrite(f"{output_dir}/{frame_name}", img)
                    frame_names.append(frame_name)
                    print(f"Saved frame {frame_number} as {frame_name}")

            # extract coordinates and save them in a separate directory for each video
            if video_group in hdf_file and "frames" in hdf_file:
                frames_dataset = hdf_file["frames"]
                frame_references = {
                    frame["frame_id"]: frame["frame_idx"]
                    for frame in frames_dataset
                    if frame["video"] == int(video_group.replace("video", ""))
                }

                # Extract instances and points
                points_dataset = hdf_file["points"]
                instances_dataset = hdf_file["instances"]

                # Pre-process instances_data
                instances_paired = {}
                for instance_entry in instances_dataset:
                    frame_id = instance_entry['frame_id']
                    track_id = instance_entry['track']
                    if frame_id not in instances_paired:
                        instances_paired[frame_id] = {}
                    instances_paired[frame_id][track_id] = instance_entry

                data = []

                # parse data
                metadata_json = hdf_file["metadata"].attrs["json"]
                metadata_dict = json.loads(metadata_json)
                nodes = metadata_dict["nodes"]
                links = metadata_dict["skeletons"][0]["links"]

                keypoints = [node["name"] for node in nodes]
                skeleton = [
                    [keypoints[l["source"]], keypoints[l["target"]]] for l in links
                ]
                config["skeleton"] = skeleton

                keypoints_ids = [
                    n["id"] for n in metadata_dict["skeletons"][0]["nodes"]
                ]
                keypoints_ordered = [keypoints[idx] for idx in keypoints_ids]

                # Iterate through the frames
                for frame_id in frame_references.keys():
                    current_frame_instances = instances_paired.get(frame_id, {})
                    # Initialize temporary structure to hold keypoints for each track

                    frame_keypoint_track = {
                        idx: {kp: (np.nan, np.nan) for kp in keypoints_ordered}
                        for idx in track_index_to_identifier.keys()
                    }
                    
                    for internal_track_index, instance_info in current_frame_instances.items():
                        point_id_start = instance_info["point_id_start"]
                        point_id_end = instance_info["point_id_end"]

                        if internal_track_index in track_index_to_identifier:
                            instance_points = points_dataset[point_id_start:point_id_end]

                            for node_idx, kp in enumerate(instance_points):
                                x, y, vis = kp["x"], kp["y"], kp["visible"]
                                node_name = keypoints_ordered[node_idx]
                                if np.isnan(x) or np.isnan(y) or vis == False:
                                    x, y = None, None
                                frame_keypoint_track[internal_track_index][node_name] = (x,y)

                    keypoints_flat = []

                    # Flatten the keypoints of each track
                    for internal_track_idx in sorted(track_index_to_identifier.keys()):
                        track_kp_data = frame_keypoint_track.get(internal_track_idx)
                        if track_kp_data:
                            for kp in keypoints_ordered:
                                x, y = track_kp_data.get(kp, (np.nan, np.nan))
                                keypoints_flat.extend([x, y])
                        else:
                            for _ in keypoints_ordered:
                                keypoints_flat.extend([np.nan, np.nan])

                    frame_idx = frame_references[frame_id]
                    data.append([frame_idx] + keypoints_flat)

                # Construct DLC Header Rows
                individuals_row = ["individuals"]
                bodyparts_row = ["bodyparts"]
                columns = ["frame"]
                coords_row = ["coords"]

                num_keypoints = len(keypoints_ordered)
                num_individuals = len(display_individuals) if multianimal else 1

                if not multianimal:
                    config["bodyparts"] = keypoints_ordered
                    config["individuals"] = None
                    columns += ([f"{kp}_x" for kp in keypoints_ordered] + [f"{kp}_y" for kp in keypoints_ordered])
                    bodyparts_row += [ f"{kp}" for kp in keypoints_ordered for _ in (0, 1) ]
                    coords_row += (["x", "y"] * num_keypoints)
                else:
                    config["bodyparts"] = "MULTI!"
                    config["multianimalbodyparts"] = keypoints_ordered
                    config["uniquebodyparts"] = []
                    individuals = [str(k) for k in range(1,max_instances+1)]
                    config["individuals"] = individuals
                    config["identity"] = False
                    for m in range(num_individuals):
                        columns += ([f"{kp}_x" for kp in keypoints_ordered] + [f"{kp}_y" for kp in keypoints_ordered])
                        bodyparts_row += [ f"{kp}" for kp in keypoints_ordered for _ in (0, 1) ]
                        coords_row += (["x", "y"] * num_keypoints)
                        for _ in range(num_keypoints*2):
                            individuals_row += [individuals[m]]

                scorer_row = ["scorer"] + [f"{scorer}"] * (len(columns) - 1)

                labels_df = pd.DataFrame(data, columns=columns)

                video_base_name = os.path.basename(video_filename).split(".")[0]
                labels_df["frame"] = labels_df["frame"].apply(
                    lambda x: (
                        f"labeled-data/{video_base_name}/"
                        f"img{str(int(x)).zfill(8)}.png"
                    )
                )
                labels_df = labels_df.groupby("frame", as_index=False).first()
                data_frames.append(labels_df)

                # Combine all data frames into a single DataFrame
                combined_df = pd.concat(data_frames, ignore_index=True)

                header_df = pd.DataFrame(
                    [row for row in [scorer_row, individuals_row, bodyparts_row, coords_row] if row != individuals_row or multianimal],
                    columns=combined_df.columns
                )

                final_df = pd.concat([header_df, combined_df], ignore_index=True)
                final_df.columns = [None] * len(final_df.columns)  # Set header to None

                # Save concatenated labels
                final_df.to_csv(
                    os.path.join(output_dir, f"CollectedData_{scorer}.csv"),
                    index=False,
                    header=None,
                )

    with open(os.path.join(deeplabcut_dir, "config.yaml"), "w") as outfile:
        yaml.dump(config, outfile, default_flow_style=False)


In [None]:
import scipy.io as sio
import shutil
import subprocess

def convert_Label3D_to_deeplabcut(
    Label3D_file: str | Path,
    skeleton_file: str | Path,
    deeplabcut_dir: str,
    scorer: str = "me",
    COM: bool = False,
):
    """
    Convert a Label3D label into a DeepLabCut project
    WARNING: Conversion might corrupt the data.
    Once conversion is complete, run deeplabcut.convertcsv2h5('config.yaml')
    Args:
        Label3D_file : string | Path
            Path to the exported Label3D_dannce.mat file.
        skeleton_file : string | Path
            Label3D skeleton used during label
        deeplabcut_dir: string
            Output directory.
        scorer: string, optional
            Name of scorer.
    """
    config = {
        "scorer": scorer,
        "multianimalproject": False,
        "date": time.strftime("%Y-%m-%d"),
        "Task": f"{scorer}-{Label3D_file.split('/')[-1].rsplit('.mat', 1)[0]}",
        "identity": None,
        "project_path": deeplabcut_dir,
        "engine": "pytorch",
        "video_sets": {},
        "start": 0,
        "stop": 1,
        "numframes2pick": 20,
        "skeleton_color": "white",
        "pcutoff": 0.6,
        "dotsize": 12,
        "alphavalue": 0.6,
        "colormap": "rainbow",
        "TrainingFraction": [0.95],
        "iteration": 0,
        "default_net_type": "resnet_50",
        "default_augmenter": "default",
        "snapshotindex": -1,
        "detector_snapshotindex": -1,
        "batch_size": 8,
        "detector_batch_size": 1,
        "cropping": False,
        "x1": 224,
        "x2": 736,
        "y1": 14,
        "y2": 526,
        "corner2move2": [50, 50],
        "move2corner": True,
        "SuperAnimalConversionTables": None,
    }

    # create directories
    if not os.path.exists(deeplabcut_dir):
        os.makedirs(deeplabcut_dir)
        os.makedirs(os.path.join(deeplabcut_dir, "labeled-data"))
        os.makedirs(os.path.join(deeplabcut_dir, "videos"))

    # parse .mat file
    label_data = sio.loadmat(Label3D_file)
    skeleton_data = sio.loadmat(skeleton_file)

    # Extract bodyparts and skeletons from skeleton file
    bodyparts = [str(bp[0]) for bp in skeleton_data['joint_names'][0]] if not COM else ["com","com2"]

    links = skeleton_data["joints_idx"]

    skeleton = []
    if not COM:
        for i in range(0,len(links)):
            l = links[i,:]
            skeleton.append([bodyparts[l[0]-1], bodyparts[l[1]-1]])
        
    config["skeleton"] = skeleton
    config["bodyparts"] = bodyparts

    print(f"Extracted body parts: {bodyparts}")
    print(skeleton)

    # Access labelData, which is a 4x1 cell array of structs
    label_data_cells = label_data['labelData'][:,0]

    # Iterate through each camera view
    for cam_idx, cam_struct in enumerate(label_data_cells):
        # Extract 2D points for the current camera view
        data_2d = cam_struct['data_2d'][0,0]
        data_sampleID = cam_struct['data_sampleID'][0,0]

        if data_2d.size == 0 or data_sampleID.size == 0:
            print(f"Camera {cam_idx}'s data_2d or data_sampleID is empty! Skipping camera {cam_idx}.")
        else:
            num_frames = data_2d.shape[0]
            num_bodyparts = len(bodyparts)

            # Reshape data_2d to (num_frames, num_bodyparts, 2) for x, y coordinates
            points_2d = data_2d.reshape(num_frames, num_bodyparts, 2)

            # Generate unique video name for each camera view
            base_video_name = Path(Label3D_file).stem.replace('_dannce', '')
            video_name = f"{base_video_name}_cam{cam_idx + 1}"
            video_base_name = os.path.join(deeplabcut_dir, "videos", f"{video_name}.mp4")
            config["video_sets"][video_base_name] = {"crop": [0, 0, 640, 480]}

            # Create a MultiIndex DataFrame for DLC for the current camera view
            columns = pd.MultiIndex.from_product(
                [[scorer], bodyparts, ['x', 'y']],
                names=['scorer', 'bodyparts', 'coords']
            )
            data_df = pd.DataFrame(index=data_sampleID.flatten(), columns=columns, dtype=float)

            # Populate the DataFrame
            for frame_idx, sampleID in enumerate(data_sampleID.flatten()):
                for bp_idx, bp_name in enumerate(bodyparts):
                    x = points_2d[frame_idx, bp_idx, 0]
                    y = points_2d[frame_idx, bp_idx, 1]
                    data_df.loc[sampleID, (scorer, bp_name, 'x')] = x
                    data_df.loc[sampleID, (scorer, bp_name, 'y')] = y

            # Generate image paths for the index
            image_paths = [f"labeled-data/{video_name}/img{str(int(sampleID)).zfill(8)}.png" for sampleID in data_sampleID.flatten()]
            data_df.index = image_paths

            # Extract frames using FFmpeg
            video_base_dir = os.path.dirname(Label3D_file)
            input_video_path = os.path.join(video_base_dir, "Videos", f"Camera{cam_idx + 1}", "0.mp4")
            output_labeled_data_dir = os.path.join(deeplabcut_dir, "labeled-data", video_name)
            os.makedirs(output_labeled_data_dir, exist_ok=True) # Ensure directory exists
            print(f"Extracting frames for {video_name} from {input_video_path}")

            # Iterate through sampleIDs and extract corresponding frames
            for frame_idx, sampleID in enumerate(data_sampleID.flatten()):
                output_image_path_relative = data_df.index[frame_idx]
                # Construct the full absolute path for the output image
                full_output_image_path = os.path.join(deeplabcut_dir, output_image_path_relative)

                if os.path.isfile(full_output_image_path):
                    print(f"Frame {frame_idx} already in the {deeplabcut_dir}, skipping...")
                    continue
                else:
                    ffmpeg_command = f'ffmpeg -y -i "{input_video_path}" -vf select="eq(n\,{int(sampleID)})" -vframes 1 "{full_output_image_path}"'
                    try:
                        subprocess.run(ffmpeg_command, shell=True, check=True, capture_output=True, text=True)
                        print(f"Successfully extracted frame {sampleID}.")
                    except subprocess.CalledProcessError as e:
                        print(f"Error extracting frame {sampleID}: {e}")
                        print(f"Stderr: {e.stderr}")
                        print(f"Stdout: {e.stdout}")

            # Save the DataFrame to CSV for the current camera view
            csv_output_path = os.path.join(output_labeled_data_dir, f"CollectedData_{scorer}.csv")
            data_df.to_csv(csv_output_path, index=True, header=True)
            print(f"Labeled data for {video_name} saved to: {csv_output_path}")

            # Copy videos to DLC project
            output_video_path = os.path.join(deeplabcut_dir, "videos")
            if os.path.isfile(os.path.join(output_video_path, f"{video_name}.mp4")):
                print(f"{video_name}.mp4 already in the DLC folder, skipping...")
                continue
            else:
                print(f"Copying {video_name}.mp4 to DLC folder...")
                shutil.copy(input_video_path, output_video_path)
                video_old_file = os.path.join(output_video_path, "0.mp4")
                video_new_name = os.path.join(output_video_path, f"{video_name}.mp4")
                os.rename(video_old_file, video_new_name)

        # Save config.yaml (this should be done once after all video_sets are populated)
        config_path = os.path.join(deeplabcut_dir, "config.yaml")
        if os.path.isfile(config_path):
            config_backup = os.path.join(deeplabcut_dir, "config_bak.yaml")
            print("Config file already existed, renamed the original config as config_bak.yaml")
            shutil.copy(config_path,config_backup)
            with open(config_path, 'r') as f:
                config_org = yaml.load(f, Loader=yaml.SafeLoader)
                config_org["video_sets"].update(config["video_sets"])
                print("Appended new video_sets to the originals.")
            with open(config_path, 'w') as file:
                yaml.dump(config_org, file, default_flow_style=False, sort_keys=False)
                print(f"DeepLabCut config in {config_path} has been updated.")
        else:
            with open(config_path, 'w') as f:
                yaml.dump(config, f, sort_keys=False)
            print(f"DeepLabCut config saved to: {config_path}")
        print("Conversion complete.")

In [None]:
if sleap2dlc and not l3d2dlc:
    sleapfile = 'D:\Project\Sleap-Models\BTR\labels.double.pkg.slp'
    dlcdir = 'D:\Project\DLC-Models\Double'
    convert_sleap_to_deeplabcut(sleapfile,dlcdir,"bezver",True)

    configfile = os.path.join(dlcdir,"config.yaml")
    convertcsv2h5(configfile)

elif l3d2dlc and not sleap2dlc:
    Label3D_file = "D:/Project/SDANNCE-Models/4CAM-250620/SD-20250705-MULTI/SD-20250705-MULTI-COM3D.mat"
    skeleton_file = "D:/Repository/Label3D/skeletons/com-multi.mat"
    dlcdir = 'D:\Project\DLC-Models\COM3D'
    convert_Label3D_to_deeplabcut(Label3D_file,skeleton_file,dlcdir,"bezver",COM=True)

    configfile = os.path.join(dlcdir,"config.yaml")
    convertcsv2h5(configfile)

else:
    # If neither, just convert csv2h5
    dlcdir = 'D:\Project\DLC-Models\COM3D'
    configfile = os.path.join(dlcdir,"config.yaml")
    convertcsv2h5(configfile)
