In [None]:
"""
Outputs:
- group4_Lab1.xlsx with sheets:
  motion1_head, motion1_neck,
  motion2_rightshoulder, motion2_rightarm, motion2_rightforearm, motion2_righthand,
  offsets, meta
"""

from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd


# ---------------------------
# BVH parsing
# ---------------------------
def parse_bvh(bvh_path: str | Path):
    """
    Returns:
      joints: list[str]          (joint names encountered)
      offsets: np.ndarray        shape (num_joints, 3) - excludes End Sites by design here
      labels: list[str]          channel labels in column order, like 'Head_Xrotation'
      motion: np.ndarray         shape (frames, channels)
      frame_time: float          seconds
    """
    bvh_path = Path(bvh_path)

    with bvh_path.open("r", encoding="utf-8", errors="ignore") as f:
        lines = f.read().splitlines()

    # Split into hierarchy and motion sections
    try:
        motion_idx = next(i for i, l in enumerate(lines) if l.strip() == "MOTION")
    except StopIteration:
        raise ValueError(f"No MOTION section found in {bvh_path}")

    hier_lines = lines[:motion_idx]
    mot_lines = lines[motion_idx + 1 :]

    # Parse Frames and Frame Time
    frames_line = next((l for l in mot_lines if l.strip().startswith("Frames:")), None)
    ft_line = next((l for l in mot_lines if l.strip().startswith("Frame Time:")), None)
    if frames_line is None or ft_line is None:
        raise ValueError(f"Missing 'Frames:' or 'Frame Time:' in {bvh_path}")

    n_frames = int(frames_line.split(":")[1].strip())
    frame_time = float(ft_line.split(":")[1].strip())

    ft_idx = mot_lines.index(ft_line)
    data_lines = mot_lines[ft_idx + 1 : ft_idx + 1 + n_frames]
    if len(data_lines) != n_frames:
        raise ValueError(
            f"Expected {n_frames} frame rows, got {len(data_lines)} in {bvh_path}"
        )

    # Parse hierarchy: joint names, offsets, and channel labels
    joint_stack: list[str] = []
    joints: list[str] = []
    offsets: list[list[float]] = []
    labels: list[str] = []

    for raw in hier_lines:
        line = raw.strip()

        if line.startswith(("ROOT", "JOINT")):
            # joint names can contain spaces (rare, but safe)
            jname = " ".join(line.split()[1:])
            joint_stack.append(jname)
            joints.append(jname)

        elif line.startswith("OFFSET"):
            # OFFSETS exist for ROOT/JOINT blocks and also for End Site blocks.
            # This parser only appends offsets when we are inside a real joint block,
            # and ignores End Sites because End Sites are not pushed to joint_stack.
            parts = line.split()
            if len(parts) < 4:
                raise ValueError(f"Bad OFFSET line in {bvh_path}: {raw}")
            if joint_stack:
                offsets.append([float(parts[1]), float(parts[2]), float(parts[3])])

        elif line.startswith("CHANNELS"):
            parts = line.split()
            if len(parts) < 3:
                raise ValueError(f"Bad CHANNELS line in {bvh_path}: {raw}")
            k = int(parts[1])
            chans = parts[2 : 2 + k]
            if not joint_stack:
                raise ValueError(f"CHANNELS outside joint block in {bvh_path}: {raw}")
            jname = joint_stack[-1]
            labels.extend([f"{jname}_{c}" for c in chans])

        elif line == "}":
            if joint_stack:
                joint_stack.pop()

    offsets_arr = np.asarray(offsets, dtype=float)

    # Parse motion numeric matrix
    num_channels = len(labels)
    flat = np.fromstring("\n".join(data_lines), sep=" ")
    if flat.size != n_frames * num_channels:
        raise ValueError(
            f"Data size mismatch in {bvh_path}: "
            f"got {flat.size} floats, expected {n_frames * num_channels} "
            f"({n_frames} frames * {num_channels} channels)."
        )
    motion = flat.reshape((n_frames, num_channels))

    return joints, offsets_arr, labels, motion, frame_time


def slice_joint(motion: np.ndarray, labels: list[str], joint_name: str):
    """Return (cols, data, colnames) for all channels belonging to joint_name."""
    prefix = joint_name + "_"
    cols = [i for i, lab in enumerate(labels) if lab.startswith(prefix)]
    if not cols:
        # Give a helpful error with close matches
        unique_joints = sorted({lab.split("_", 1)[0] for lab in labels})
        raise ValueError(
            f"No channels found for joint '{joint_name}'. "
            f"Available joints include: {', '.join(unique_joints[:30])}"
            + (" ..." if len(unique_joints) > 30 else "")
        )
    colnames = [labels[i] for i in cols]
    data = motion[:, cols]
    return cols, data, colnames


# ---------------------------
# Main: generate Excel
# ---------------------------
def main():
    # ---- EDIT THESE PATHS ----
    motion1_bvh = Path("motion_1_all.bvh")
    motion2_bvh = Path("motion_2_all.bvh")
    out_xlsx = Path("group4_Lab1.xlsx")
    # -------------------------

    # Parse BVHs
    joints1, offsets1, labels1, motion1_all, dt1 = parse_bvh(motion1_bvh)
    joints2, offsets2, labels2, motion2_all, dt2 = parse_bvh(motion2_bvh)

    # Required joint extracts (sheet_name, motion, labels, joint_name)
    extracts = [
        ("motion1_head", motion1_all, labels1, "Head"),
        ("motion1_neck", motion1_all, labels1, "Neck"),
        ("motion2_rightshoulder", motion2_all, labels2, "RightShoulder"),
        ("motion2_rightarm", motion2_all, labels2, "RightArm"),
        ("motion2_rightforearm", motion2_all, labels2, "RightForeArm"),
        ("motion2_righthand", motion2_all, labels2, "RightHand"),
    ]

    # Write workbook
    with pd.ExcelWriter(out_xlsx, engine="openpyxl") as writer:
        for sheet, mot, labs, joint in extracts:
            _, data, colnames = slice_joint(mot, labs, joint)
            pd.DataFrame(data, columns=colnames).to_excel(writer, sheet_name=sheet, index=False)

        # Offsets (lab wants 72x3 matrix called offsets; writing to a sheet is convenient)
        pd.DataFrame(offsets1, columns=["x", "y", "z"]).to_excel(writer, sheet_name="offsets", index=False)

        # Meta (optional, but useful)
        meta = pd.DataFrame(
            {
                "item": [
                    "motion1_frames",
                    "motion1_dt",
                    "motion1_channels",
                    "motion2_frames",
                    "motion2_dt",
                    "motion2_channels",
                    "offsets_shape",
                ],
                "value": [
                    motion1_all.shape[0],
                    dt1,
                    motion1_all.shape[1],
                    motion2_all.shape[0],
                    dt2,
                    motion2_all.shape[1],
                    f"{offsets1.shape}",
                ],
            }
        )
        meta.to_excel(writer, sheet_name="meta", index=False)

    # Ouput summary
    print(f"Wrote: {out_xlsx.resolve()}")
    print(f"motion1_all shape: {motion1_all.shape}, dt={dt1}")
    print(f"motion2_all shape: {motion2_all.shape}, dt={dt2}")
    print(f"offsets shape: {offsets1.shape}")
    print("Sheets:", [s[0] for s in extracts] + ["offsets", "meta"])


if __name__ == "__main__":
    main()


Wrote: C:\Users\conno\Documents\DEVELOPMENT\BME551\lab_1\groupY_Lab1.xlsx
motion1_all shape: (1458, 354), dt=0.008
motion2_all shape: (1212, 354), dt=0.008
offsets shape: (72, 3)
Sheets: ['motion1_head', 'motion1_neck', 'motion2_rightshoulder', 'motion2_rightarm', 'motion2_rightforearm', 'motion2_righthand', 'offsets', 'meta']
