In [1]:
# # 01 ‚Äî Load and Inspect

# This notebook loads the CSV file using the project preprocessing module.
# It inspects:
# ‚Ä¢ metadata
# ‚Ä¢ frame indices
# ‚Ä¢ timestamps
# ‚Ä¢ position arrays
# quaternion arrays

In [2]:
import os
import numpy as np
import pandas as pd

# Make sure src/ is on the Python path
import sys
# Check if we're in notebooks/ dir or project root
if os.path.basename(os.getcwd()) == 'notebooks':
    PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
else:
    PROJECT_ROOT = os.path.abspath(os.getcwd())
SRC_PATH = os.path.join(PROJECT_ROOT, "src")
if SRC_PATH not in sys.path:
    sys.path.insert(0, SRC_PATH)

from pipeline_config import CONFIG
from preprocessing import parse_optitrack_csv

print("Paths configured.")

Paths configured.


In [3]:
# Path loaded from config.yaml
DATA_DIR = os.path.join(PROJECT_ROOT, CONFIG['data_dir'])
CSV_PATH = os.path.join(DATA_DIR, CONFIG['current_csv'])

print("CSV path:", CSV_PATH)

CSV path: c:\Users\drorh\OneDrive - Mobileye\Desktop\gaga\data\734\T3\734_T3_P2_R1_Take 2025-12-30 04.12.54 PM_002.csv


In [4]:
import json

SCHEMA_PATH = os.path.join(PROJECT_ROOT, "config", "skeleton_schema.json")

if os.path.exists(SCHEMA_PATH):
    with open(SCHEMA_PATH, "r") as f:
        schema = json.load(f)
    print("Schema loaded:", SCHEMA_PATH)
else:
    schema = None
    print("Schema file not found ‚Äî you must create or provide one.")

Schema loaded: c:\Users\drorh\OneDrive - Mobileye\Desktop\gaga\config\skeleton_schema.json


In [5]:
frame_idx, time_s, pos_mm, q_global, loader_report = parse_optitrack_csv(
    CSV_PATH,
    schema
)

print("CSV parsed successfully.")

CSV parsed successfully.


In [6]:
# # Run this in a new cell to inspect the raw file structure
# with open(CSV_PATH, 'r', encoding='utf-8', errors='ignore') as f:
#     print("--- FIRST 10 LINES OF CSV ---")
#     for i in range(10):
#         print(repr(f.readline())) # repr() shows us hidden characters like \t or \r

In [7]:
import pprint
pprint.pprint(loader_report)

{'calibration': {'export_date': None,
                 'optitrack_version': 'unknown',
                 'pointer_tip_rms_error_mm': None,
                 'wand_error_mm': None},
 'data_quality': {'nan_position_percent': '0.00%',
                  'nan_rotation_percent': '0.00%'},
 'duration_sec': 137.525,
 'file_path': 'c:\\Users\\drorh\\OneDrive - '
              'Mobileye\\Desktop\\gaga\\data\\734\\T3\\734_T3_P2_R1_Take '
              '2025-12-30 04.12.54 PM_002.csv',
 'fps_estimated': 120.00480019200238,
 'segments_expected': 51,
 'segments_found_count': 51,
 'segments_found_list': ['Hips',
                         'Spine',
                         'Spine1',
                         'Neck',
                         'Head',
                         'LeftShoulder',
                         'LeftArm',
                         'LeftForeArm',
                         'LeftHand',
                         'LeftHandThumb1',
                         'LeftHandThumb2',
                      

In [8]:
print("Frame count:", len(frame_idx))
print("Time array shape:", time_s.shape)
print("Positions array shape:", pos_mm.shape)   # (T, J, 3)
print("Quaternions array shape:", q_global.shape)  # (T, J, 4)

Frame count: 16504
Time array shape: (16504,)
Positions array shape: (16504, 51, 3)
Quaternions array shape: (16504, 51, 4)


In [9]:
print("First 10 timestamps:", time_s[:10])
print("Last timestamp:", time_s[-1])
print("Effective FPS estimate:", 1.0 / np.median(np.diff(time_s)))

First 10 timestamps: [0.       0.008333 0.016667 0.025    0.033333 0.041667 0.05     0.058333
 0.066667 0.075   ]
Last timestamp: 137.525
Effective FPS estimate: 120.00480019200238


In [10]:
pos_missing = np.isnan(pos_mm).sum()
quat_missing = np.isnan(q_global).sum()

print("Missing position values:", pos_missing)
print("Missing quaternion values:", quat_missing)

Missing position values: 0
Missing quaternion values: 0


In [11]:
print("First frame positions (mm):")
print(pos_mm[0])

print("\nFirst frame quaternions (xyzw):")
print(q_global[0])

First frame positions (mm):
[[ 107.922668  847.03656   126.937088]
 [ 106.91288   926.592712  124.017258]
 [ 103.863426 1173.503296  119.737335]
 [ 103.288902 1429.482056  120.38903 ]
 [ 103.610497 1566.462402  146.496048]
 [ 140.617676 1387.541504  119.639977]
 [ 249.296295 1402.324341   98.837128]
 [ 550.555725 1314.707764   91.313148]
 [ 779.091125 1242.041016  114.825096]
 [ 797.605835 1211.064697  148.796326]
 [ 824.159485 1208.221924  161.365753]
 [ 846.541443 1197.191895  165.08049 ]
 [ 866.909912 1197.486816  125.627594]
 [ 897.215637 1165.128296  122.96067 ]
 [ 906.302979 1144.868774  122.606613]
 [ 859.02124  1195.801758  103.017883]
 [ 894.494934 1162.746826   97.030296]
 [ 908.861938 1140.381592   95.151489]
 [ 846.976685 1196.74646    81.880081]
 [ 880.731628 1169.029785   73.817192]
 [ 895.241089 1152.429932   71.157051]
 [ 834.864746 1197.676636   60.549007]
 [ 862.156616 1176.9198     51.233253]
 [ 874.962769 1165.259399   47.276348]
 [  66.21756  1387.378784  117.93042

In [12]:
from skeleton_defs import SKELETON_HIERARCHY

def validate_hierarchy_and_build_map(found_joints_list, strict_mode=True):
    """
    Quality Gate:
    1. Checks if ALL joints defined in SKELETON_HIERARCHY exist in the data.
    2. Builds the final kinematics mapping dictionary.
    3. Throws a CRITICAL ERROR if strict_mode is True and any joint is missing.
    """
    
    print(f"\n{'='*20} HIERARCHY VALIDATION REPORT {'='*20}")
    print(f"Strict Mode: {strict_mode}\n")
    
    kinematics_map = {}
    missing_joints = []
    
    
    found_set = set(found_joints_list)
    
 
    for segment, info in SKELETON_HIERARCHY.items():
        parent = info['parent']
        angle_name = info['angle_name']
        
       
        if segment not in found_set:
            print(f"[FAIL] ‚ùå Segment Missing: {segment}")
            missing_joints.append(segment)
            continue
            
       
        if parent is not None and parent not in found_set:
            print(f"[FAIL] ‚ùå Parent Missing for {segment}: {parent}")
            missing_joints.append(f"{segment}_parent_{parent}")
            continue

        # 3. ◊ë◊†◊ô◊ô◊™ ◊î◊û◊ô◊§◊ï◊ô ◊ú◊î◊û◊©◊ö
        status = "Global" if parent is None else f"Rel: {parent}"
        print(f"[PASS] ‚úÖ {segment:<15} -> {status:<15} => Output: '{angle_name}'")
        
        kinematics_map[segment] = {
            "parent": parent,
            "angle_name": angle_name,
            "is_global": (parent is None)
        }

    print(f"\n{'='*60}")
    
 
    if len(missing_joints) > 0:
        error_msg = f"CRITICAL VALIDATION ERROR: Missing {len(missing_joints)} required joints: {missing_joints}"
        if strict_mode:
            raise ValueError(error_msg)
        else:
            print(f"WARNING: {error_msg}")
    else:
        print("SUCCESS: Full skeleton hierarchy validated successfully.")
        
    return kinematics_map


try:
    kinematics_targets = validate_hierarchy_and_build_map(
        loader_report['segments_found_list'], 
        strict_mode=True 
    )
    print("\nKinematics Map Created Ready for Processing.")
except ValueError as e:
    print(f"\n‚õî STOPPING EXECUTION: {e}")


Strict Mode: True

[PASS] ‚úÖ Hips            -> Global          => Output: 'Pelvis_Global_Orientation (World Space)'
[PASS] ‚úÖ Spine           -> Rel: Hips       => Output: 'Lumbar_Angle'
[PASS] ‚úÖ Spine1          -> Rel: Spine      => Output: 'Thoracic_Angle'
[PASS] ‚úÖ Neck            -> Rel: Spine1     => Output: 'Neck_Base_Angle'
[PASS] ‚úÖ Head            -> Rel: Neck       => Output: 'Head_Angle'
[PASS] ‚úÖ LeftUpLeg       -> Rel: Hips       => Output: 'LeftHip_Angle'
[PASS] ‚úÖ LeftLeg         -> Rel: LeftUpLeg  => Output: 'LeftKnee_Angle'
[PASS] ‚úÖ LeftFoot        -> Rel: LeftLeg    => Output: 'LeftAnkle_Angle'
[PASS] ‚úÖ LeftToeBase     -> Rel: LeftFoot   => Output: 'LeftToe_Angle'
[PASS] ‚úÖ RightUpLeg      -> Rel: Hips       => Output: 'RightHip_Angle'
[PASS] ‚úÖ RightLeg        -> Rel: RightUpLeg => Output: 'RightKnee_Angle'
[PASS] ‚úÖ RightFoot       -> Rel: RightLeg   => Output: 'RightAnkle_Angle'
[PASS] ‚úÖ RightToeBase    -> Rel: RightFoot  => Output: 'RightToe_Ang

In [13]:
# --- Save parsed data for Step 02 ---
import os

# 1. Create Output Directory
DERIV_01 = os.path.join(PROJECT_ROOT, CONFIG['derivatives_dir'], "step_01_parse")
os.makedirs(DERIV_01, exist_ok=True)

# 2. Convert raw arrays to DataFrame for saving
# We need to flatten the 3D arrays (Frames, Joints, 3/4) into 2D columns
flattened_data = {
    "frame_idx": frame_idx,
    "time_s": time_s
}

joint_names = schema['joint_names']

for i, name in enumerate(joint_names):
    # Save Position (X, Y, Z)
    flattened_data[f"{name}__px"] = pos_mm[:, i, 0]
    flattened_data[f"{name}__py"] = pos_mm[:, i, 1]
    flattened_data[f"{name}__pz"] = pos_mm[:, i, 2]
    
    # Save Rotation (X, Y, Z, W)
    flattened_data[f"{name}__qx"] = q_global[:, i, 0]
    flattened_data[f"{name}__qy"] = q_global[:, i, 1]
    flattened_data[f"{name}__qz"] = q_global[:, i, 2]
    flattened_data[f"{name}__qw"] = q_global[:, i, 3]

df_export = pd.DataFrame(flattened_data)

# 3. Save to Parquet
run_id = os.path.splitext(os.path.basename(CSV_PATH))[0]
out_path = os.path.join(DERIV_01, f"{run_id}__parsed_run.parquet")

df_export.to_parquet(out_path, index=False)

print(f"‚úÖ Data saved successfully to:\n{out_path}")

‚úÖ Data saved successfully to:
c:\Users\drorh\OneDrive - Mobileye\Desktop\gaga\derivatives\step_01_parse\734_T3_P2_R1_Take 2025-12-30 04.12.54 PM_002__parsed_run.parquet


In [14]:
import pandas as pd
import json
import os
import numpy as np

# --- 1. Build DataFrame ---
# ◊©◊ô◊û◊ï◊© ◊ë-joints_found ◊û◊™◊ï◊ö ◊î-loader_report
joints_found = loader_report['segments_found_list']

flattened_data = {
    'frame_idx': frame_idx,
    'time_s': time_s
}

for j_idx, joint_name in enumerate(joints_found):
    # Position (mm) - Mapping to __px, __py, __pz
    flattened_data[f'{joint_name}__px'] = pos_mm[:, j_idx, 0]
    flattened_data[f'{joint_name}__py'] = pos_mm[:, j_idx, 1]
    flattened_data[f'{joint_name}__pz'] = pos_mm[:, j_idx, 2]
    # Rotation (Quaternion) - Mapping to __qx, __qy, __qz, __qw
    flattened_data[f'{joint_name}__qx'] = q_global[:, j_idx, 0]
    flattened_data[f'{joint_name}__qy'] = q_global[:, j_idx, 1]
    flattened_data[f'{joint_name}__qz'] = q_global[:, j_idx, 2]
    flattened_data[f'{joint_name}__qw'] = q_global[:, j_idx, 3]

df_clean = pd.DataFrame(flattened_data)

# --- 2. Define RUN_ID and output directory ---
RUN_ID = os.path.splitext(os.path.basename(CSV_PATH))[0]
DERIV_01 = os.path.join(PROJECT_ROOT, CONFIG['derivatives_dir'], "step_01_parse")
os.makedirs(DERIV_01, exist_ok=True)

# --- 3. Create THE ULTIMATE ENHANCED REPORT ---
# Enhancement 1: Added calibration data extraction
calibration_data = loader_report.get('calibration', {})

enhanced_report = {
    "identity": {
        "run_id": RUN_ID,
        "processing_timestamp": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M'),
        "pipeline_version": "v2.6_calibration_enhanced",
        "csv_source": CSV_PATH
    },
    "raw_data_quality": {
        "total_frames": len(df_clean),
        "missing_data_percent": loader_report['data_quality']['nan_position_percent'],
        "sampling_rate_actual": loader_report['fps_estimated'],
        "optitrack_mean_error_mm": float(metadata.get('mean_error', 0.0)) if 'metadata' in locals() else 0.0,
        "optitrack_version": calibration_data.get('optitrack_version', 'unknown')
    },
    "calibration": {
        "pointer_tip_rms_error_mm": calibration_data.get('pointer_tip_rms_error_mm'),
        "wand_error_mm": calibration_data.get('wand_error_mm'),
        "export_date": calibration_data.get('export_date')
    },
    "skeleton_info": {
        "segments_found": joints_found,
        "segments_found_count": len(joints_found),
        "segments_missing": loader_report['segments_missing_list'],
        "segments_missing_count": loader_report['segments_missing_count']
    },
    "duration_sec": loader_report['duration_sec']
}

# --- 4. Save report ---
report_path = os.path.join(DERIV_01, f"{RUN_ID}__step01_loader_report.json")
with open(report_path, 'w') as f:
    json.dump(enhanced_report, f, indent=4)

# --- 5. Summary Output ---
print(f"\n{'='*60}")
print(f"‚úÖ STEP 01 COMPLETE: {RUN_ID}")
print(f"üìä Frames: {len(df_clean):,}")
print(f"üé¨ Actual FPS: {loader_report['fps_estimated']:.2f}")
print(f"üìê Mean System Error: {enhanced_report['raw_data_quality']['optitrack_mean_error_mm']:.5f} mm")
print(f"ü¶¥ Segments Found: {len(joints_found)}/{loader_report.get('segments_expected', 'N/A')}")
print(f"üìÇ Audit Report Saved: {report_path}")
print(f"{'='*60}\n")

# Display first few rows
df_clean.head()

# At the end of cells with large arrays, add:
import gc
gc.collect()

# Don't keep multiple copies of large data
# Instead of: df_backup = df_clean
# Use: df_clean.to_parquet('temp_backup.parquet')


‚úÖ STEP 01 COMPLETE: 734_T3_P2_R1_Take 2025-12-30 04.12.54 PM_002
üìä Frames: 16,504
üé¨ Actual FPS: 120.00
üìê Mean System Error: 0.00000 mm
ü¶¥ Segments Found: 51/51
üìÇ Audit Report Saved: c:\Users\drorh\OneDrive - Mobileye\Desktop\gaga\derivatives\step_01_parse\734_T3_P2_R1_Take 2025-12-30 04.12.54 PM_002__step01_loader_report.json



63