# Ingest FHIBE

## Imports

In [None]:
import json
from collections import defaultdict
from pathlib import Path

import numpy as np
import pandas as pd
import tlc
from tlc.core.helpers._value_map_builder import _ValueMapBuilder

## Project setup

In [None]:
PROJECT_NAME = "3LC Tutorials - FHIBE"
DATASET_NAME = "FHIBE"
TABLE_NAME = "full"
MAX_SAMPLES = 1_000_000

In [None]:
FHIBE_ROOT = Path(
    "D:/Data/fhibe.20250716.u.gT5_rFTA_downsampled_public_raw_only/fhibe.20250716.u.gT5_rFTA_downsampled_public_raw_only"
)

In [None]:
DATA_ROOT = FHIBE_ROOT / "data/raw/fhibe_downsampled"
METADATA_ROOT = FHIBE_ROOT / "data/processed/"
CSV_FILE = METADATA_ROOT / "fhibe_downsampled" / "fhibe_downsampled.csv"

In [None]:
csv_file = pd.read_csv(CSV_FILE)
csv_file.head()

## Define annotation transforms

In [None]:
NUM_KEYPOINTS = 33

KPTS = [
    "0. Nose",
    "1. Right eye inner",
    "2. Right eye",
    "3. Right eye outer",
    "4. Left eye inner",
    "5. Left eye",
    "6. Left eye outer",
    "7. Right ear",
    "8. Left ear",
    "9. Mouth right",
    "10. Mouth left",
    "11. Right shoulder",
    "12. Left shoulder",
    "13. Right elbow",
    "14. Left elbow",
    "15. Right wrist",
    "16. Left wrist",
    "17. Right pinky knuckle",
    "18. Left pinky knuckle",
    "19. Right index knuckle",
    "20. Left index knuckle",
    "21. Right thumb knuckle",
    "22. Left thumb knuckle",
    "23. Right hip",
    "24. Left hip",
    "25. Right knee",
    "26. Left knee",
    "27. Right ankle",
    "28. Left ankle",
    "29. Right heel",
    "30. Left heel",
    "31. Right foot index",
    "32. Left foot index",
]


def transform_keypoints(keypoints, image_width, image_height) -> tlc.Keypoints2DInstances:
    kpts = json.loads(keypoints.replace("'", '"'))
    kpts_arr = np.zeros((NUM_KEYPOINTS, 3), dtype=np.float32)
    for i, kpt_name in enumerate(KPTS):
        if kpt_name not in kpts:
            continue
        kpts_arr[i, :] = kpts[kpt_name]
        kpts_arr[i, 2] = 2

    instances = tlc.Keypoints2DInstances.create_empty(
        image_width=image_width,
        image_height=image_height,
        include_keypoint_visibilities=True,
        include_instance_bbs=False,
    )

    instances.add_instance(
        keypoints=kpts_arr,
        label=0,
    )

    return instances


builder = _ValueMapBuilder[str]()


def transform_segments(segments, image_width, image_height):
    segments = json.loads(segments.replace("'", '"'))

    polygons = []
    labels = []

    for segment in segments:
        class_name = segment["class_name"]
        polygon = segment["polygon"]
        poly_2_tuples = [[p["x"], p["y"]] for p in polygon]
        flattened_poly = [item for sublist in poly_2_tuples for item in sublist]
        polygons.append(flattened_poly)
        labels.append(builder(class_name))

    segs = tlc.SegmentationPolygonsDict(
        image_width=image_width,
        image_height=image_height,
        polygons=polygons,
        instance_properties={"label": labels},
    )
    return segs


def transform_bboxes(face_bbox, person_bbox, image_width, image_height):
    face_bbox = json.loads(face_bbox)
    person_bbox = json.loads(person_bbox)

    bboxes = {
        tlc.IMAGE_WIDTH: image_width,
        tlc.IMAGE_HEIGHT: image_height,
        tlc.BOUNDING_BOX_LIST: [
            {
                tlc.X0: face_bbox[0],
                tlc.Y0: face_bbox[1],
                tlc.X1: face_bbox[2],
                tlc.Y1: face_bbox[3],
                tlc.LABEL: 0,
            },
            {
                tlc.X0: person_bbox[0],
                tlc.Y0: person_bbox[1],
                tlc.X1: person_bbox[2],
                tlc.Y1: person_bbox[3],
                tlc.LABEL: 1,
            },
        ],
    }

    return bboxes

In [None]:
import math


def transform_metadata(input_metadata):
    try:
        metadata = json.loads(input_metadata.replace("'", '"'))
    except:
        metadata = input_metadata

    if isinstance(metadata, float) and math.isnan(metadata):
        return []

    return metadata

## Load data

In [None]:
data = defaultdict(list)

for index, row in csv_file.iterrows():
    input_row = row.to_dict()
    image_path = FHIBE_ROOT / input_row["filepath"]

    ## Extract and convert annotations to 3lc format

    image_height = input_row["image_height"]
    image_width = input_row["image_width"]
    keypoints = transform_keypoints(input_row["keypoints"], image_width, image_height)
    segments = transform_segments(input_row["segments"], image_width, image_height)
    bboxes = transform_bboxes(input_row["face_bbox"], input_row["person_bbox"], image_width, image_height)
    data["image"].append(image_path.as_posix())
    data["keypoints"].append(keypoints.to_row())
    data["bbs"].append(bboxes)
    data["segments"].append(segments)

    ## Extract metadata

    # These keys are treated separately
    IGNORE_KEYS = set(["filepath", "image_height", "image_width", "keypoints", "face_bbox", "person_bbox", "segments"])
    # These keys have some nan-issues (easy to fix, but skip for now)
    ERRORING_KEYS = set(
        [
            "model",
            "keypoints_QAannotator_id",
            "camera_QAannotator_id",
            "face_bbox_QAannotator_id",
            "segments_QAannotator_id",
            "camera_position_QAannotator_id",
            "manufacturer",
            "aperture_value",
            "shutter_speed_value",
            "face_bbox_annotator_id",
            "focal_length",
            "head_pose_annotator_id",
            "keypoints_annotator_id",
            "iso_speed_ratings",
            "segments_annotator_id",
            "location_region",
        ]
    )

    for key in set(input_row.keys()) - IGNORE_KEYS - ERRORING_KEYS:
        data[key].append(transform_metadata(input_row[key]))

    if index > MAX_SAMPLES:
        break

## Write 3LC Table

In [None]:
table_writer = tlc.TableWriter(
    table_name=TABLE_NAME,
    dataset_name=DATASET_NAME,
    project_name=PROJECT_NAME,
    column_schemas={
        "image": tlc.ImageUrlSchema(),
        "keypoints": tlc.Keypoints2DSchema(
            classes=["person"],
            num_keypoints=NUM_KEYPOINTS,
            point_attributes=list(map(lambda x: x.split(". ")[1], KPTS)),
            include_per_point_visibility=True,
        ),
        "bbs": tlc.BoundingBoxListSchema(
            label_value_map={0: tlc.MapElement("face"), 1: tlc.MapElement("person")},
            include_segmentation=False,
            x1_number_role=tlc.NUMBER_ROLE_BB_SIZE_X,
            y1_number_role=tlc.NUMBER_ROLE_BB_SIZE_Y,
        ),
        "segments": tlc.SegmentationSchema(
            label_value_map={i: tlc.MapElement(v.split(". ")[1]) for i, v in enumerate(builder._values)},
        ),
    },
)
table_writer.add_batch(data)
table = table_writer.finalize()

In [None]:
table