
# Track C – Spatial Grounding

This notebook evaluates **spatial understanding** by detecting and tracking key objects, limbs, or regions over time.  
It demonstrates:

- **Object / body-part detection** (hands, limbs, tools, regions)
- **Pose tracking** using [MediaPipe Pose](https://developers.google.com/mediapipe/solutions/vision/pose)
- **Spatial grounding & overlays** (bounding boxes, keypoints)
- **IoU and positional stability metrics**

### Outputs
- Annotated video with bounding boxes & pose skeletons
- CSV file of IoU & stability metrics


In [None]:

# --- Setup
!pip install mediapipe ultralytics opencv-python pandas numpy tqdm -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m53.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m66.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.0/63.0 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m78.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m77.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.2/81.2 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install --upgrade pip setuptools wheel
!pip uninstall -y numpy opencv-python pandas
!pip install numpy==1.26.4 opencv-python==4.9.0.80 pandas==2.2.2

Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Collecting setuptools
  Downloading setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m73.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading setuptools-80.9.0-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m70.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: setuptools, pip
  Attempting uninstall: setuptools
    Found existing installation: setuptools 75.2.0
    Uninstalling setuptools-75.2.0:
      Successfully uninstalled setuptools-75.2.0
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour i

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: opencv-python 4.12.0.88
Uninstalling opencv-python-4.12.0.88:
  Successfully uninstalled opencv-python-4.12.0.88
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting opencv-python==4.9.0.80
  Downloading opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pandas==2.2.2
  Downloading pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m119.0 MB/s[0m  [33m0:00:00[0m
[?25hDownloading opencv_pytho

In [None]:
!pip install numpy==1.26.4 opencv-python==4.9.0.80 pandas==2.2.2 mediapipe==0.10.13 ultralytics tqdm

Collecting mediapipe==0.10.13
  Downloading mediapipe-0.10.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting ultralytics
  Downloading ultralytics-8.3.228-py3-none-any.whl.metadata (37 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe==0.10.13)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe==0.10.13)
  Downloading sounddevice-0.5.3-py3-none-any.whl.metadata (1.6 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
INFO: pip is looking at multiple versions of jax to determine which version is compatible with other requirements. This could take a while.
Collecting jax (from mediapipe==0.10.13)
  Downloading jax-0.8.0-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe==0.10.13)
  Downloading jaxlib-0.8.0-cp312-cp312-manylinux_2_27_x86_64.whl.metadata (1.3 kB)
Collecting 

In [None]:
import cv2, mediapipe as mp, numpy as np, pandas as pd, os
from tqdm import tqdm
from ultralytics import YOLO
from IPython.display import Video, display

# Initialize detectors
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Load lightweight YOLO model (CPU)
try:
    yolo = YOLO('yolov8n.pt')
except Exception as e:
    print("Warning: YOLOv8 not found or incompatible:", e)
    yolo = None

os.makedirs("trackC_outputs", exist_ok=True)

NameError: name 'core' is not defined

In [None]:
# Uninstall potentially conflicting packages
!pip uninstall -y mediapipe numpy opencv-python pandas jax jaxlib protobuf ultralytics ultralytics-thop sounddevice

Found existing installation: mediapipe 0.10.13
Uninstalling mediapipe-0.10.13:
  Successfully uninstalled mediapipe-0.10.13
Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: opencv-python 4.9.0.80
Uninstalling opencv-python-4.9.0.80:
  Successfully uninstalled opencv-python-4.9.0.80
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Found existing installation: jax 0.7.1
Uninstalling jax-0.7.1:
  Successfully uninstalled jax-0.7.1
Found existing installation: jaxlib 0.7.1
Uninstalling jaxlib-0.7.1:
  Successfully uninstalled jaxlib-0.7.1
Found existing installation: protobuf 4.25.8
Uninstalling protobuf-4.25.8:
  Successfully uninstalled protobuf-4.25.8
Found existing installation: ultralytics 8.3.228
Uninstalling ultralytics-8.3.228:
  Successfully uninstalled ultralytics-8.3.228
Found existing installation: ultralytics-thop 2.0.18
Uninstall

In [None]:
# Reinstall packages with specified versions
!pip install numpy==1.26.4 opencv-python==4.9.0.80 pandas==2.2.2 mediapipe==0.10.13 ultralytics tqdm



In [None]:
import cv2, mediapipe as mp, numpy as np, pandas as pd, os
from tqdm import tqdm
from ultralytics import YOLO
from IPython.display import Video, display

# Initialize detectors
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Load lightweight YOLO model (CPU)
try:
    yolo = YOLO('yolov8n.pt')
except Exception as e:
    print("Warning: YOLOv8 not found or incompatible:", e)
    yolo = None

os.makedirs("trackC_outputs", exist_ok=True)



Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 355.0MB/s 0.0s


## 1. Load or capture video

In [None]:

video_path = "/content/irma_satellite_simulation.mp4"  # Replace with your file path or 0 for webcam

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise RuntimeError(f"Cannot open video source: {video_path}")

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

out_path = "trackC_outputs/spatial_grounding_annotated.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (W, H))

frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"Video loaded: {frames} frames at {fps:.1f} FPS, size=({W}x{H})")


Video loaded: 480 frames at 24.0 FPS, size=(640x480)


## 2. Process frames for spatial grounding

In [None]:

iou_values, stability_values = [], []
prev_boxes, prev_centroids = None, []

for i in tqdm(range(frames)):
    ret, frame = cap.read()
    if not ret: break
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # --- Pose Detection
    pose_res = pose.process(rgb)
    if pose_res.pose_landmarks:
        for lm in pose_res.pose_landmarks.landmark:
            cx, cy = int(lm.x * W), int(lm.y * H)
            cv2.circle(frame, (cx, cy), 3, (0,255,255), -1)
        mp.solutions.drawing_utils.draw_landmarks(frame, pose_res.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # --- Object Detection (YOLO)
    boxes = []
    if yolo:
        results = yolo.predict(frame, imgsz=320, verbose=False)
        for r in results:
            for box in r.boxes.xyxy.cpu().numpy():
                x1,y1,x2,y2 = box[:4].astype(int)
                boxes.append((x1,y1,x2,y2))
                cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)

    # --- IoU Calculation (frame-to-frame overlap)
    if prev_boxes and boxes:
        inter_areas = []
        for (x1,y1,x2,y2) in boxes:
            for (px1,py1,px2,py2) in prev_boxes:
                ix1,iy1,ix2,iy2 = max(x1,px1), max(y1,py1), min(x2,px2), min(y2,py2)
                if ix2>ix1 and iy2>iy1:
                    inter = (ix2-ix1)*(iy2-iy1)
                    union = (x2-x1)*(y2-y1)+(px2-px1)*(py2-py1)-inter
                    inter_areas.append(inter/union if union>0 else 0)
        if inter_areas:
            iou_values.append(np.mean(inter_areas))

    # --- Positional stability (centroid movement)
    if boxes:
        centroids = [(int((x1+x2)/2), int((y1+y2)/2)) for (x1,y1,x2,y2) in boxes]
        if prev_centroids:
            dists = [np.linalg.norm(np.array(c1)-np.array(c2)) for c1,c2 in zip(centroids, prev_centroids[:len(centroids)])]
            stability_values.append(np.mean(dists))
        prev_centroids = centroids

    prev_boxes = boxes
    writer.write(frame)

cap.release()
writer.release()


100%|██████████| 480/480 [00:15<00:00, 30.49it/s]


## 3. Compute metrics and visualize results

In [None]:

mean_iou = np.mean(iou_values) if iou_values else 0
mean_stability = np.mean(stability_values) if stability_values else 0

metrics = {
    "mean_IoU": mean_iou,
    "mean_stability": mean_stability,
    "num_frames": frames
}
df = pd.DataFrame([metrics])
df.to_csv("trackC_outputs/trackC_spatial_metrics.csv", index=False)
print("Metrics saved:", df)

display(Video(out_path, embed=True, width=480))


Metrics saved:    mean_IoU  mean_stability  num_frames
0  0.995704        0.585429         480


✅ **Results Summary**

In [None]:

print(df.to_string(index=False))


 mean_IoU  mean_stability  num_frames
 0.995704        0.585429         480
