In [None]:
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import sys

from asl_data_pipeline.models.s3 import InputType
from asl_data_pipeline.preprocessing.hand_landmark_extractor.extractor import (
    HandLandmarkExtractor,
    HandLandmarkExtractorConfig,
)
from asl_data_pipeline.preprocessing.hand_sa_converter.converter import HandSAConverter
from asl_data_pipeline.preprocessing.hand_sa_converter.models import HandSAConverterConfig
from asl_data_pipeline.preprocessing.pose_extractor.extractor import PoseExtractor, PoseExtractorConfig
from asl_data_pipeline.preprocessing.key_frame_extractor.extractor import KeyFrameExtractor, KeyFrameExtractorConfig
from asl_data_pipeline.preprocessing.key_frame_extractor.models import S3Config
from asl_data_pipeline.preprocessing.pose_sa_converter.converter import PoseSAConverter
from asl_data_pipeline.preprocessing.pose_sa_converter.models import PoseSAConverterConfig

In [None]:
# Set up application-wide logging for the notebook and imported modules.
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s",
    stream=sys.stdout,
)

Extract representative frames from input videos stored on S3 and create a JSONL manifest listing all generated frames. This manifest is the input for downstream landmark extraction steps.

# Step 1: Frame Extraction

In [None]:
# Extract representative key frames from source videos on S3.
# Inputs:
#   - s3.input_uri: S3 prefix containing input videos (InputType.FOLDER)
# Outputs:
#   - Frames written to s3.output_uri preserving structure
#   - A JSONL manifest (manifest.jsonl) listing frame S3 paths and metadata
# Please check out the data model of KeyFrameExtractorConfig to better control this particular step to your liking.
key_frame_extractor_config = KeyFrameExtractorConfig(
    s3=S3Config(
        input_uri="s3://bucket-name/input-folder/",
        output_uri="s3://bucket-name/output-folder/video_keyframes/",
        aws_profile="aws-profile-name",
        input_type=InputType.FOLDER,
    ),
    max_videos=4,
    force_write=True,
)
frame_extractor = KeyFrameExtractor(key_frame_extractor_config)
frame_extractor.process_videos_batch()


Detect 21-keypoint hand landmarks on the frames listed in the key-frames manifest, then write per-frame JSON outputs and a new JSONL manifest for use by later steps.

# Step 2: Hand Landmark Extraction

In [None]:
# Run hand landmark detection on each key frame and save results to S3.
# Inputs:
#   - s3.input_uri: JSONL manifest of key frames (InputType.MANIFEST)
# Outputs:
#   - Per-frame JSON files with 21-keypoint hand landmarks
#   - New manifest.jsonl under s3.output_uri indexing produced annotations
# Please check out the data model of HandLandmarkExtractorConfig to better control this particular step to your liking.
hand_landmark_extractor_config = HandLandmarkExtractorConfig(
    s3=S3Config(
        input_uri="s3://bucket-name/output-folder/video_keyframes/manifest.jsonl",
        output_uri="s3://bucket-name/output-folder/video_hand_landmarks_extracted/",
        aws_profile="aws-profile-name",
        input_type=InputType.MANIFEST,
    ),
    out_of_bound_removal=True,
    max_workers=4,
    force_write=True,
)
hand_landmark_extractor = HandLandmarkExtractor(hand_landmark_extractor_config)
hand_landmark_extractor.process_batch()

Detect 33-keypoint whole-body pose landmarks for each key frame and emit per-frame JSON plus a manifest that indexes the generated pose annotations.

# Step 3: Pose Extraction

In [None]:
# Run whole-body pose landmark detection on each key frame and save to S3.
# Inputs:
#   - s3.input_uri: JSONL manifest produced by the key-frame step (InputType.MANIFEST)
# Outputs:
#   - Per-frame JSON with 33-keypoint pose landmarks
#   - New manifest.jsonl under s3.output_uri listing pose annotation files
# Please check out the data model of PoseExtractorConfig to better control this particular step to your liking.
pose_extractor_config = PoseExtractorConfig(
    s3=S3Config(
        input_uri="s3://bucket-name/output-folder/video_keyframes/manifest.jsonl",
        output_uri="s3://bucket-name/output-folder/video_pose_landmarks_extracted/",
        aws_profile="aws-profile-name",
        input_type=InputType.MANIFEST,
    ),
    out_of_bound_removal=True,
    max_workers=4,
    force_write=True,
)
pose_extractor = PoseExtractor(pose_extractor_config)
pose_extractor.process_batch()

Convert the extracted hand landmark JSON into SuperAnnotate (SA) keypoint annotations compatible with SA tools, producing per-image SA JSON alongside a manifest for import.

# Step 4: Hand SuperAnnotate format conversion

In [None]:
# Transform hand landmark JSON into SuperAnnotate (SA) format for labeling tools.
# Inputs:
#   - s3.input_uri: manifest produced by the hand landmark extractor (InputType.MANIFEST)
# Outputs:
#   - Per-image SA JSON files under s3.output_uri
#   - New manifest.jsonl indexing the generated SA annotations
# Please check out the data model of HandSAConverterConfig to better control this particular step to your liking.
hand_sa_converter_config = HandSAConverterConfig(
    s3=S3Config(
        input_uri="s3://bucket-name/output-folder/video_hand_landmarks_extracted/manifest.jsonl",  # noqa: E501
        output_uri="s3://bucket-name/output-folder/video_hand_landmarks_sa/",
        aws_profile="aws-profile-name",
        input_type=InputType.MANIFEST,
    ),
    force_write=True,
    max_workers=4,
)
hand_sa_converter = HandSAConverter(hand_sa_converter_config)
hand_sa_converter.process_batch()


Convert pose landmark JSON into SuperAnnotate (SA) format so it can be reviewed or refined in SA, producing per-image SA JSON and a manifest suitable for import.

# Step 5: Pose SuperAnnotate format conversion

In [None]:
# Transform pose landmark JSON into SuperAnnotate (SA) format for SA tooling.
# Inputs:
#   - s3.input_uri: manifest from the pose landmark extractor (InputType.MANIFEST)
# Outputs:
#   - Per-image SA JSON files for pose under s3.output_uri
#   - New manifest.jsonl indexing the pose SA annotations
# Please check out the data model of PoseSAConverterConfig to better control this particular step to your liking.

pose_sa_converter_config = PoseSAConverterConfig(
    s3=S3Config(
        input_uri="s3://bucket-name/output-folder/video_pose_landmarks_extracted/manifest.jsonl",  # noqa: E501
        output_uri="s3://bucket-name/output-folder/video_pose_landmarks_sa/",
        aws_profile="aws-profile-name",
        input_type=InputType.MANIFEST,
    ),
    force_write=True,
    max_workers=4,
)
pose_sa_converter = PoseSAConverter(pose_sa_converter_config)
pose_sa_converter.process_batch()

# Step 6: Data Postprocessing (Optional)

In [None]:
# After annotating data in SuperAnnotate and exporting, use the postprocessing script 
# to redact emails and remove metadata before sharing.
#
# Compatible SuperAnnotate Export Formats: 
#   - JSON (single annotation)
#   - JSONL (one annotation per line)
#   Both formats are fully supported by the postprocessing script.
#
# Usage:
#   python scripts/asl_data_postprocessing.py <sa_export>.json -o <output>.json
#   python scripts/asl_data_postprocessing.py <sa_export>.jsonl -o <output>.jsonl
#
# For detailed usage, see scripts/asl_data_postprocessing.py or run:
#   python scripts/asl_data_postprocessing.py --help