<a href="https://colab.research.google.com/github/SagRagg/Capstone-Project-9785-S2-25/blob/Skeletal-Tracking/MediaPipe_Tracking_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Imports and Installs


In [1]:
!pip -q install mediapipe opencv-python-headless requests certifi

import os, base64, requests, certifi
import cv2
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from IPython.display import HTML, Javascript, display
from google.colab import output

## 2. Get Pose Model


In [2]:
MODEL_URL = ("https://storage.googleapis.com/mediapipe-models/"
             "pose_landmarker/pose_landmarker_full/float16/latest/pose_landmarker_full.task")
MODEL_PATH = "pose_landmarker_full.task"

def ensure_model(url, path):
    if os.path.exists(path):
        return path
    r = requests.get(url, timeout=60, verify=certifi.where())
    r.raise_for_status()
    with open(path, "wb") as f:
        f.write(r.content)
    return path

ensure_model(MODEL_URL, MODEL_PATH)
print("Model ready:", MODEL_PATH)

Model ready: pose_landmarker_full.task


## 3. Functions


In [3]:
def draw_landmarks_on_image(bgr, detection_result):
    annotated = bgr.copy()
    for pose_landmarks in detection_result.pose_landmarks:
        nlm_list = landmark_pb2.NormalizedLandmarkList()
        for lm in pose_landmarks:
            nlm = nlm_list.landmark.add()
            nlm.x, nlm.y, nlm.z = lm.x, lm.y, lm.z
            if hasattr(lm, "visibility"): nlm.visibility = lm.visibility
        mp.solutions.drawing_utils.draw_landmarks(
            annotated, nlm_list, mp.solutions.pose.POSE_CONNECTIONS)
    return annotated

def process_video(in_path, out_path="pose_output.mp4", max_people=1, min_conf=0.5):
    BaseOptions = mp.tasks.BaseOptions
    PoseLandmarker = mp.tasks.vision.PoseLandmarker
    PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
    VisionRunningMode = mp.tasks.vision.RunningMode

    options = PoseLandmarkerOptions(
        base_options=BaseOptions(model_asset_path=MODEL_PATH),
        running_mode=VisionRunningMode.VIDEO,
        num_poses=max_people,
        min_pose_detection_confidence=min_conf,
        min_pose_presence_confidence=min_conf,
        min_tracking_confidence=min_conf,
    )

    cap = cv2.VideoCapture(in_path)
    if not cap.isOpened():
        raise RuntimeError("Could not open input video.")

    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    w  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h  = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(out_path, fourcc, fps, (w, h))

    ts_ms = 0.0
    with mp.tasks.vision.PoseLandmarker.create_from_options(options) as landmarker:
        while True:
            ok, frame_bgr = cap.read()
            if not ok:
                break
            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
            result = landmarker.detect_for_video(mp_image, int(ts_ms))
            if result.pose_landmarks:
                frame_bgr = draw_landmarks_on_image(frame_bgr, result)
            writer.write(frame_bgr)
            ts_ms += 1000.0 / fps

    cap.release()
    writer.release()
    return out_path

def show_video_inline(path, width=720):
    mp4 = open(path, 'rb').read()
    data_url = "data:video/mp4;base64," + base64.b64encode(mp4).decode()
    return HTML(f'<video controls playsinline loop src="{data_url}" width="{width}"></video>')

def record_to_mp4(seconds=5, out_mp4="webcam.mp4", show=True):
    js = Javascript(r"""
    async function record(sec) {
      try {
        const stream = await navigator.mediaDevices.getUserMedia({video: true, audio: false});

        // Prefer WebM (widely supported by MediaRecorder). We'll convert to MP4 in Python.
        const mime =
          (MediaRecorder.isTypeSupported('video/webm;codecs=vp9') && 'video/webm;codecs=vp9') ||
          (MediaRecorder.isTypeSupported('video/webm;codecs=vp8') && 'video/webm;codecs=vp8') ||
          'video/webm';

        const rec = new MediaRecorder(stream, { mimeType: mime });
        const chunks = [];
        rec.ondataavailable = e => { if (e.data && e.data.size) chunks.push(e.data); };
        rec.start();

        await new Promise(r => setTimeout(r, sec * 1000));
        rec.stop();
        await new Promise(r => rec.onstop = r);
        stream.getTracks().forEach(t => t.stop());

        const blob = new Blob(chunks, {type: mime});

        // Use FileReader to get a base64 string safely (no huge argument spreads).
        const b64 = await new Promise((resolve, reject) => {
          const reader = new FileReader();
          reader.onload = () => {
            // reader.result is like "data:video/webm;base64,AAAA..."
            const res = reader.result;
            const comma = res.indexOf(',');
            resolve(res.slice(comma + 1));  // just the base64 payload
          };
          reader.onerror = () => reject(reader.error || new Error('readAsDataURL failed'));
          reader.readAsDataURL(blob);
        });

        return JSON.stringify({mime, b64});
      } catch (e) {
        return JSON.stringify({error: (e && e.message) ? e.message : String(e)});
      }
    }
    """)
    display(js)
    result = output.eval_js(f"record({int(seconds)})")
    obj = json.loads(result)
    if "error" in obj:
        raise RuntimeError("Browser recording error: " + obj["error"])

    b64 = obj["b64"]
    # Save WebM, then convert to MP4
    tmp_webm = "webcam.webm"
    with open(tmp_webm, "wb") as f:
        f.write(base64.b64decode(b64))

    # Convert WebM -> MP4
    cmd = ["ffmpeg", "-loglevel", "error", "-y",
           "-i", tmp_webm, "-c:v", "libx264", "-pix_fmt", "yuv420p",
           "-movflags", "+faststart", out_mp4]
    subprocess.run(cmd, check=True)
    os.remove(tmp_webm)

    print(f"Saved: {out_mp4}")
    if show:
        display(show_video_inline(out_mp4))
    return out_mp4


## 4. Record Yourself and Save Video

In [4]:
# Record from webcam -> always save as MP4 (webcam.mp4) without JS stack overflow
from IPython.display import Javascript, display, HTML
from google.colab import output
import base64, json, os, subprocess

# --- Use it ---
mp4_path = record_to_mp4(seconds=5, out_mp4="webcam.mp4", show=True)

<IPython.core.display.Javascript object>

Saved: webcam.mp4


## 5. Apply The Model

In [5]:
input_mp4 = "webcam.mp4"          # <- change if your file has a different name
output_mp4 = "pose_output.mp4"

out_path = process_video(input_mp4, output_mp4, max_people=1, min_conf=0.5)
show_video_inline(out_path)