In [None]:
import csv
import os
from pathlib import Path
import subprocess
import requests
import init_arkit
import cv2
import json

print("INIT FILE:", init_arkit.__file__)
import importlib
importlib.reload(init_arkit)

# -------------------------------------------------------------
# Utils
# -------------------------------------------------------------
def download_file(file_url: str, dest_path: str | os.PathLike) -> bool:
    dest_path = Path(dest_path)
    try:
        print(f"Downloading {file_url} â†’ {dest_path}")
        response = requests.get(file_url, timeout=60)
        response.raise_for_status()
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        with dest_path.open("wb") as f:
            f.write(response.content)
        print(f"âœ“ Downloaded: {dest_path}")
        return True
    except Exception as e:
        print(f"âœ— Failed to download {file_url}: {e}")
        return False


# -------------------------------------------------------------
# Download ARKit labelmaker assets
# -------------------------------------------------------------
def download_arkit_labelmaker(video_id: str, split: str, scene_dir: Path) -> None:
    print("\n===============================================")
    print(f"ðŸ“Œ Downloading ARKit labelmaker: {video_id} (split: {split})")
    print("===============================================")

    label_files = ["labels.txt", "point_lifted_mesh.ply"]
    labels_base_url = "https://huggingface.co/datasets/labelmaker/arkit_labelmaker/raw/main"
    ply_base_url = "https://huggingface.co/datasets/labelmaker/arkit_labelmaker/resolve/main"

    for file_name in label_files:
        file_url = (
            f"{ply_base_url}/{split}/{video_id}/{file_name}"
            if file_name.endswith(".ply")
            else f"{labels_base_url}/{split}/{video_id}/{file_name}"
        )
        download_file(file_url, scene_dir / file_name)

    print(f"ðŸŽ‰ Finished downloading scene's labelmaker: {video_id}")


# -------------------------------------------------------------
# Download ARKit scene
# -------------------------------------------------------------
def download_arkit_scene(video_id: str, split: str, download_dir: str = "arkitscenes") -> None:
    print("\n===============================================")
    print(f"ðŸ“Œ Downloading ARKit Scene: {video_id} (split: {split})")
    print("===============================================")

    cmd = [
        "python3", "download_data.py", "raw",
        "--video_id", video_id,
        "--split", split,
        "--download_dir", download_dir,
        "--raw_dataset_assets", "lowres_wide.traj",
        "vga_wide", "vga_wide_intrinsics",
    ]

    subprocess.run(cmd, check=False)
    print(f"ðŸŽ‰ Finished downloading scene: {video_id}")


# -------------------------------------------------------------
# Main dataset processing
# -------------------------------------------------------------
def download_arkit_dataset(
    csv_path: str = "raw_train_val_splits.csv",
    download_dir: str = "arkitscenes",
    output_fol: str = "segmentation_summary",
) -> None:

    csv_path = Path(csv_path)
    if not csv_path.exists():
        print(f"CSV not found: {csv_path}")
        return

    with csv_path.open("r") as f:
        reader = csv.DictReader(f)

        for row in reader:
            video_id = row.get("video_id") or row.get("id") or row.get("scene_id")
            split = row.get("split") or row.get("fold") or row.get("scene_type")

            if not video_id or not split:
                continue

            output_dir = Path(output_fol) / video_id
            output_dir.mkdir(parents=True, exist_ok=True)

            download_arkit_scene(video_id, split, download_dir)

            scene_dir = Path(download_dir) / "raw" / split / video_id
            intrinsics_dir = scene_dir / "vga_wide_intrinsics"
            image_dir = scene_dir / "vga_wide"
            traj_path = scene_dir / "lowres_wide.traj"

            download_arkit_labelmaker(video_id, split, scene_dir)

            mesh_path = scene_dir / "point_lifted_mesh.ply"
            labels_path = scene_dir / "labels.txt"

            if not (mesh_path.exists() and labels_path.exists() and traj_path.exists()):
                print(f"Missing required files for {video_id}, skipping.")
                continue

            # -------------------------------------------------
            # Load poses and estimate GLOBAL roll ONCE
            # -------------------------------------------------
            poses = list(init_arkit.read_traj(traj_path).items())
            global_snap_roll = init_arkit.estimate_global_snap_roll(poses)

            print(f"[INFO] Global snapped roll for {video_id}: {global_snap_roll}")

            roll_affine = None
            output_size = None

            # -------------------------------------------------
            # Enumerate frames
            # -------------------------------------------------
            png_files = sorted(f for f in os.listdir(image_dir) if f.endswith(".png"))

            class_pixel_stats = {"door": {}, "switch": {}}

            for idx, filename in enumerate(png_files):
                if idx % 30 != 0:
                    continue

                extracted_ts = init_arkit.extract_timestamp_from_filename(filename)
                if extracted_ts is None:
                    continue

                _, (rotvec, transvec) = init_arkit.get_pose_for_nearest_timestamp(
                    extracted_ts, poses
                )

                frame_name = f"{video_id}_{extracted_ts}"
                frame_path = image_dir / f"{frame_name}.png"
                pincam_path = intrinsics_dir / f"{frame_name}.pincam"

                if not frame_path.exists() or not pincam_path.exists():
                    continue

                rgb_img = cv2.imread(str(frame_path))
                if rgb_img is None:
                    continue

                # -------------------------------------------------
                # APPLY GLOBAL ORIENTATION (IMAGE)
                # -------------------------------------------------
                if global_snap_roll and roll_affine is None:
                    A, nW, nH = init_arkit.compute_roll_affine(
                        rgb_img.shape[0], rgb_img.shape[1], global_snap_roll
                    )
                    roll_affine = A
                    output_size = (nW, nH)

                if roll_affine is not None:
                    rgb_img = cv2.warpAffine(
                        rgb_img,
                        roll_affine,
                        output_size,
                        flags=cv2.INTER_LINEAR,
                        borderMode=cv2.BORDER_CONSTANT,
                        borderValue=(0, 0, 0),
                    )

                # -------------------------------------------------
                # PROJECT (NO ORIENTATION LOGIC INSIDE)
                # -------------------------------------------------
                projection, contains_target, label_counts, total_pixels, door_instances_2d = (
                    init_arkit.project_instance(
                        mesh_path=mesh_path,
                        labels_path=labels_path,
                        pincam_path=str(pincam_path),
                        rotation_vec=rotvec,
                        translation_vec=transvec,
                        rgb_frame=rgb_img,
                        alpha=0.6,
                        roll_affine=roll_affine,
                        output_size=output_size,
                    )
                )

                # -------------------------------------------------
                # Stats
                # -------------------------------------------------
                for obj_type, present in zip(["door"], contains_target):
                    if not present:
                        continue

                    ts = extracted_ts
                    for inst_id in door_instances_2d:
                        inst_id = str(inst_id)
                        class_pixel_stats[obj_type].setdefault(inst_id, {})
                        class_pixel_stats[obj_type][inst_id].setdefault(ts, {"labels": []})

                        for label_id, count in label_counts.items():
                            pct = (count / max(total_pixels, 1)) * 100.0
                            class_pixel_stats[obj_type][inst_id][ts]["labels"].append(
                                {
                                    "label_id": int(label_id),
                                    "pixel_count": int(count),
                                    "pixel_percentage": pct,
                                }
                            )

                cv2.imwrite(str(output_dir / f"overlay_{extracted_ts}.png"), projection)

            # -------------------------------------------------
            # Save stats
            # -------------------------------------------------
            with (output_dir / f"{video_id}.json").open("w") as f:
                json.dump(class_pixel_stats, f, indent=4)

            init_arkit.run_molmo(
                video_id=video_id,
                json_path=output_dir / f"{video_id}.json",
                image_path=image_dir,
                output_dir=output_dir,
                poses=poses,
            )


# -------------------------------------------------------------
# Entry point
# -------------------------------------------------------------
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--csv_path", default="raw_train_val_splits.csv")
    parser.add_argument("--download_dir", default="arkitscenes")
    parser.add_argument("--output_dir", default="segmentation_summary")

    args, _ = parser.parse_known_args()

    download_arkit_dataset(
        csv_path=args.csv_path,
        download_dir=args.download_dir,
        output_fol=args.output_dir,
    )


INIT FILE: /home/devika/Desktop/MasterProject/Code/processarkit/init_arkit.py
Reading split file: raw_train_val_splits.csv
Loaded row: {'video_id': '41048068', 'visit_id': 'NA', 'fold': 'Training'}

ðŸ“Œ Downloading ARKit Scene: 41048068  (split: Training)
Downloading file /home/devika/Desktop/MasterProject/Code/processarkit/arkitscenes/raw/Training/41048068/lowres_wide.traj


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  223k  100  223k    0     0   199k      0  0:00:01  0:00:01 --:--:--  200k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0

Downloading file /home/devika/Desktop/MasterProject/Code/processarkit/arkitscenes/raw/Training/41048068/vga_wide.zip


100 2509M  100 2509M    0     0  29.1M      0  0:01:25  0:01:25 --:--:-- 30.0M


Unzipping zip file /home/devika/Desktop/MasterProject/Code/processarkit/arkitscenes/raw/Training/41048068/vga_wide.zip
Downloading file /home/devika/Desktop/MasterProject/Code/processarkit/arkitscenes/raw/Training/41048068/vga_wide_intrinsics.zip


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1325k  100 1325k    0     0  1198k      0  0:00:01  0:00:01 --:--:-- 1198k


Unzipping zip file /home/devika/Desktop/MasterProject/Code/processarkit/arkitscenes/raw/Training/41048068/vga_wide_intrinsics.zip
ðŸŽ‰ Finished downloading scene: 41048068

ðŸ“Œ Downloading ARKit labelmaker: 41048068  (split: Training)
Downloading https://huggingface.co/datasets/labelmaker/arkit_labelmaker/raw/main/Training/41048068/labels.txt â†’ arkitscenes/raw/Training/41048068/labels.txt
âœ“ Downloaded: arkitscenes/raw/Training/41048068/labels.txt
Downloading https://huggingface.co/datasets/labelmaker/arkit_labelmaker/resolve/main/Training/41048068/point_lifted_mesh.ply â†’ arkitscenes/raw/Training/41048068/point_lifted_mesh.ply
âœ“ Downloaded: arkitscenes/raw/Training/41048068/point_lifted_mesh.ply
ðŸŽ‰ Finished downloading scene's labelmaker: 41048068
Found 6914 PNG frames in arkitscenes/raw/Training/41048068/vga_wide
[INFO] Global snapped roll for video 41048068: -90
Image: 41048068_3064.459.png â†’ Extracted TS: 3064.459 â†’ Closest Pose TS: 3065.076
Error in scene 41048068, fr

AttributeError: module 'init_arkit' has no attribute 'run_molmo'