In [None]:
import numpy as np
import h5py
import json


In [None]:
sleap_file = "D:/Project/Sleap-Models/BTR/labels.double.pkg.slp"

In [None]:
with h5py.File(sleap_file, "r") as h:
    keys = h.keys()
    print("Keys in the HDF5 file:", keys)

    # Load the full instances dataset to inspect its fields
    full_instances_data = h["instances"][:]
    print("\nInstances dataset fields (dtype names):", full_instances_data.dtype.names)
    print("First few entries of instances data:")
    for field_name in full_instances_data.dtype.names:
        print(f"  {field_name}: {full_instances_data[field_name][:10]}...")


    # Access the tracks_json dataset
    tracks_dataset = h["tracks_json"]
    print("\nTracks dataset object:", tracks_dataset)

    tracks_json_raw_data = tracks_dataset[:]
    print("Raw tracks_json data (first entry):", tracks_json_raw_data[0])
    print(f"Number of tracks entries: {len(tracks_json_raw_data)}")

    print("\nParsed Track Information:")
    # Create a mapping from the *index* of the tracks_json entry (0, 1)
    # to the display identifier ("1", "2")
    track_index_to_identifier = {}
    display_identifiers = [] # To keep track of all unique display names for initialization

    for i, track_bytes in enumerate(tracks_json_raw_data):
        try:
            track_string = track_bytes.decode('utf-8')
            track_list = json.loads(track_string) # Parse as a list!

            if isinstance(track_list, list) and len(track_list) >= 2:
                # The 'track' field in instances (0, 1) seems to correspond to the *index* i
                # in this tracks_json_raw_data list.
                # The track_list[1] is the user-defined identifier ("1", "2").
                track_identifier = track_list[1]
                track_index_to_identifier[i] = track_identifier
                display_identifiers.append(track_identifier)
                print(f"--- Track {i+1} ---")
                print(f"  Internal index (for instances['track']): {i}")
                print(f"  Track Identifier: {track_identifier}")
            else:
                print(f"Unexpected format for track {i}: {track_list}")

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for track {i}: {e}")
            print(f"Raw bytes causing error: {track_bytes}")
        except Exception as e:
            print(f"An unexpected error occurred for track {i}: {e}")

    print(f"\nInternal Track Index to Display Identifier Mapping: {track_index_to_identifier}")

    # --- Now, accurately map instances to tracks and retrieve points ---
    print("\n--- Mapping Instances and Retrieving Points by Track ---")

    # Initialize a dictionary to store points for each track
    # Keys will be the display identifiers ("1", "2")
    # Values will be dictionaries: {frame_id: points_array_for_that_instance_in_that_frame}
    tracked_data_by_identity = {
        identifier: {} for identifier in display_identifiers
    }

    points_data = h["points"][:] # Load the full points array once

    for instance_entry in full_instances_data:
        frame_id = instance_entry['frame_id']
        # The 'track' field now correctly corresponds to the *index* in tracks_json
        internal_track_index = instance_entry['track']
        point_id_start = instance_entry['point_id_start']
        point_id_end = instance_entry['point_id_end']

        # Get the display identifier ("1" or "2") using the index
        display_track_identifier = track_index_to_identifier.get(internal_track_index, f"Unknown_Track_{internal_track_index}")

        # Slice the points data using the start and end indices
        instance_points = points_data[point_id_start:point_id_end]

        # Store the points.
        tracked_data_by_identity[display_track_identifier][frame_id] = instance_points

    print("\n--- Example of Tracked Points by Identity and Frame ---")
    for identifier, frames_data in tracked_data_by_identity.items():
        print(f"\nTrack Identifier '{identifier}':")
        # Sort frames for consistent output
        sorted_frames = sorted(frames_data.keys())
        for k in range(min(5, len(sorted_frames))):
            frame = sorted_frames[k]
            coords = frames_data[frame]
            print(f"  Frame {frame}: shape={coords.shape}, first node={coords[0]}")
        if len(sorted_frames) > 5:
            print("  ...")
        print(f"  Total frames with data for Track '{identifier}': {len(frames_data)}")

    # You can now access all points for a specific track, for example, Track '1'
    # all_points_for_track_1 = tracked_data_by_identity['1']
    # You can then get points for a specific frame from that track:
    # points_for_track_1_frame_0 = all_points_for_track_1.get(0)
    # print(f"\nPoints for Track '1' in Frame 0:\n{points_for_track_1_frame_0}")