In [None]:
# Install core dependencies
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install ONNX and supporting libraries
!pip install onnxruntime==1.13.1
!pip install onnxruntime-gpu==1.13.1

# Install utility libraries
!pip install matplotlib==3.6.2
!pip install opencv-python==4.7.0.68
!pip install scipy==1.11.4
!pip install scikit-image==0.19.3
!pip install huggingface_hub==0.20.1

# Install DWPose (ONNX-based version)
!pip install dwpose


Looking in indexes: https://download.pytorch.org/whl/cu118
INFO: pip is looking at multiple versions of torch to determine which version is compatible with other requirements. This could take a while.
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.1%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (23.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m94.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (875 kB)
[2K     [90m━━━━━━━━━━━━━━━━

Collecting opencv-python==4.7.0.68
  Downloading opencv_python-4.7.0.68-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Reason for being yanked: Deprecated, use 4.7.0.71[0m[33m
[0mDownloading opencv_python-4.7.0.68-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (61.8 MB)
[2K   [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/61.8 MB[0m [31m165.0 MB/s[0m eta [36m0:00:01[0m[31mERROR: Operation cancelled by user[0m[31m
[0m^C


In [None]:
!pip install onnxruntime==1.17.1
!pip install onnxruntime-gpu==1.17.1


In [None]:
from google.colab import auth
auth.authenticate_user()


In [None]:
from dwpose import DwposeDetector
from PIL import Image
import numpy as np
import json
import os
from tqdm import tqdm
from google.cloud import storage

# ==== CONFIG ====
bucket_name = "ucftoy-dataset"
gcs_input_root = "clips_10s_video1"
local_input_root = "/tmp/clips"
output_root = "/content/pose_jsons"
gcs_output_root = "pose_jsons"

# ==== Setup ====
os.makedirs(local_input_root, exist_ok=True)
os.makedirs(output_root, exist_ok=True)
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)

# ==== Load DWPose ====
model = DwposeDetector.from_pretrained_default()

# ==== Helper ====
def get_bbox_center(bbox):
    x, y, w, h = bbox
    return np.array([x + w / 2, y + h / 2])

# ==== List all clip folders in GCS ====
blobs = list(storage_client.list_blobs(bucket_name, prefix=gcs_input_root + "/"))
clip_folders = sorted(set(b.name.split("/")[1] for b in blobs if b.name.count("/") > 1))

# ==== Process each clip ====
for clip_folder in clip_folders:
    gcs_clip_path = f"{gcs_input_root}/{clip_folder}"
    local_clip_path = os.path.join(local_input_root, clip_folder)
    output_clip_folder = os.path.join(output_root, clip_folder)
    os.makedirs(local_clip_path, exist_ok=True)
    os.makedirs(output_clip_folder, exist_ok=True)

    # === Download frames from GCS ===
    print(f"Downloading {clip_folder}...")
    blobs = list(storage_client.list_blobs(bucket_name, prefix=gcs_clip_path))
    frame_blobs = sorted([b for b in blobs if b.name.endswith(".jpg")], key=lambda b: b.name)
    local_frame_paths = []

    for blob in frame_blobs:
        filename = os.path.basename(blob.name)
        local_path = os.path.join(local_clip_path, filename)
        blob.download_to_filename(local_path)
        local_frame_paths.append(local_path)

    # === Run pose estimation per frame ===
    for frame_path in tqdm(local_frame_paths, desc=f"Processing {clip_folder}"):
        try:
            img = Image.open(frame_path).convert("RGB")
            img_width, img_height = img.size
            left_img = img.crop((0, 0, img_width // 2, img_height))

            # Step 1: detect number of people in full image
            _, keypoints_json_full, _ = model(
                img, include_hand=True, include_face=True, include_body=True,
                image_and_json=True, detect_resolution=512
            )
            num_people = len(keypoints_json_full.get("people", []))
            selected_img = img if num_people <= 1 else left_img

            # Step 2: final inference
            _, keypoints_json_all, _ = model(
                selected_img, include_hand=True, include_face=True, include_body=True,
                image_and_json=True, detect_resolution=512
            )

            people = keypoints_json_all.get("people", [])
            if not people:
                print(f"No person detected in {frame_path}")
                continue

            # Step 3: pick person closest to center
            image_center = np.array([selected_img.width / 2, selected_img.height / 2])
            best_person = None
            min_dist = float('inf')

            for person in people:
                bbox = person.get("bbox")
                if bbox is None:
                    continue
                center = get_bbox_center(bbox)
                dist = np.linalg.norm(center - image_center)
                if dist < min_dist:
                    min_dist = dist
                    best_person = person

            if best_person is None:
                print(f"No valid bbox in {frame_path}")
                continue

            pose_flat = best_person["pose_keypoints_2d"]
            pose = np.array(pose_flat).reshape(-1, 3)
            pose_xy = pose[:, :2]

            hip_center = (pose_xy[11] + pose_xy[12]) / 2
            shoulder_span = np.linalg.norm(pose_xy[5] - pose_xy[6])
            if shoulder_span == 0:
                print(f"Skipped due to zero shoulder span: {frame_path}")
                continue

            pose_normalized = (pose_xy - hip_center) / shoulder_span

            # Edges for relative motion
            edges = [
                (5, 7), (7, 9), (6, 8), (8, 10),
                (11, 13), (13, 15), (12, 14), (14, 16),
                (5, 6), (11, 12), (5, 11), (6, 12)
            ]
            rel_pose = [(pose_normalized[j] - pose_normalized[i]).tolist() for i, j in edges]

            # === Save output ===
            json_filename = os.path.basename(frame_path).replace(".jpg", ".json")
            local_json_path = os.path.join(output_clip_folder, json_filename)
            data = {
                "image": os.path.basename(frame_path),
                "pose_normalized": pose_normalized.tolist(),
                "relative_pose_vectors": rel_pose,
                "edges": edges
            }

            # Save locally
            with open(local_json_path, "w") as f:
                json.dump(data, f, indent=2)
            print(f"Saved: {local_json_path}")

            # Upload to GCS
            gcs_output_path = f"{gcs_output_root}/{clip_folder}/{json_filename}"
            blob = bucket.blob(gcs_output_path)
            blob.upload_from_filename(local_json_path)
            print(f"Uploaded to GCS: {gcs_output_path}")

        except Exception as e:
            print(f"Error in {frame_path}: {e}")
