<a href="https://colab.research.google.com/github/Hagar633/Machine-learning-/blob/main/depthmodel_coordinates_update_kidplaying.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

import torch
import depth_pro
model, transform = depth_pro.create_model_and_transforms()
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device).eval()
model = model.half().to(device).eval()
from pickle import FRAME
from google.colab.patches import cv2_imshow
from decord import VideoReader
import mediapipe as mp
import cv2
import numpy as np
from matplotlib import pyplot as plt
import statistics

video_path = "kid1.mp4"
vr = VideoReader(video_path)
frame_count = len(vr)
print(f"Total frames: {frame_count}")
output_path = "kid1_fin.mp4"

def approximate_intrinsics(width, height, fov_x_deg=70):

    fov_x = np.deg2rad(fov_x_deg)

    fov_y = 2 * np.arctan((height/width) * np.tan(fov_x/2))

    f_x = width  / (2 * np.tan(fov_x/2))
    f_y = height / (2 * np.tan(fov_y/2))

    c_x, c_y = width/2, height/2
    return float(f_x), float(f_y), float(c_x), float(c_y)


def pixel_to_camera_coords(x, y, Z, f_x, f_y, c_x, c_y):
    """Convert pixel (x,y) + depth Z → camera 3D coordinates."""
    X = (x - c_x) * Z / f_x
    Y = (y - c_y) * Z / f_y
    return X, Y, Z


# Sample every 5 seconds (assuming 30 fps)
fps = 7
step = fps * 1
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True)
mp_draw = mp.solutions.drawing_utils

sample_frame = vr[0].asnumpy()
height, width, _ = sample_frame.shape
f_x, f_y, c_x, c_y = approximate_intrinsics(width, height, fov_x_deg=70)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
var = 0
norms = []
for i in range(0, frame_count, step):

    frame = vr[i].asnumpy()
    rgb = cv2.cvtColor(frame , cv2.COLOR_BGR2RGB)
    results = pose.process(rgb)
    img = transform(rgb).unsqueeze(0).to(device).half()
    with torch.no_grad():
        pred = model.infer(img)
        depth_map = pred["depth"].squeeze().cpu().numpy()
        depth_map = cv2.resize(depth_map, (frame.shape[1], frame.shape[0]))

    points = {}

    if results.pose_landmarks:
      h, w, _ = frame.shape
      for id, lm in enumerate(results.pose_landmarks.landmark):
        cx, cy = int(lm.x * w), int(lm.y * h)
        cz = depth_map[cy, cx]
        X, Y, Z = pixel_to_camera_coords(cx, cy, cz, f_x, f_y, c_x, c_y)
        if id == mp_pose.PoseLandmark.LEFT_ANKLE.value:
           points["left_ankle"] = (X, Y, Z)
        elif id == mp_pose.PoseLandmark.RIGHT_ANKLE.value:
            points["right_ankle"] = (X, Y, Z)
        elif id == mp_pose.PoseLandmark.LEFT_HIP.value:
            points["left_hip"] = (X, Y, Z)
        elif id == mp_pose.PoseLandmark.RIGHT_HIP.value:
            points["right_hip"] = (X, Y, Z)
        if id in [mp_pose.PoseLandmark.LEFT_ANKLE.value,
                  mp_pose.PoseLandmark.RIGHT_ANKLE.value]:
            cv2.circle(frame, (cx, cy), 5, (0, 255, 0), cv2.FILLED)

    mp_draw.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    torso_points = [p for k,p in points.items() if "hip" in k or "shoulder" in k]
    if len(torso_points) >= 2:
        com_x = np.mean([p[0] for p in torso_points])
        com_y = np.mean([p[1] for p in torso_points])
        com_z = np.mean([p[2] for p in torso_points])
    else:
        continue

    foot_midx= (points["left_ankle"][0]+  points["right_ankle"][0])/2
    foot_midy= (points["left_ankle"][1]+  points["right_ankle"][1])/2
    foot_midz= (points["left_ankle"][2]+  points["right_ankle"][2])/2
    stance_width= abs(points["left_ankle"][0] -  points["right_ankle"][0])
    sway = abs(com_x - foot_midx)
    norm= sway/stance_width
    norms.append(norm)
    if len(norms) > 1:
      var = statistics.variance(norms)
    print("stability is" , norm)
    cv2.putText(frame,
            f"Frame: {i} | Stability: {norm:.2f} | Variance: {var:.2f}",
            (10, 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 0, 200),
            2,
            cv2.LINE_AA)
    out.write(frame)
out.release()
print(f"Video saved to {output_path}")
# cv2.circle(image, (int(com_x), int(com_y)),  5, (0, 255, 255), cv2.FILLED)
# Show result in Colab



Total frames: 223
stability is 0.004023175328563192
stability is 0.29592223841428983
stability is 0.19619568635115053
stability is 0.3900112402222423
stability is 0.0162164242139321
stability is 0.48599649392430744
stability is 0.04131629679353541
stability is 0.6460986504176065
stability is 0.008657456654358004
stability is 0.6571123300397036
stability is 0.007500819250633769
stability is 1.5386070132881042
stability is 0.01665739464720946
stability is 0.2128412424778008
stability is 0.2187201242159825
stability is 0.32547888470073494
stability is 0.17471208863015764
stability is 0.1985688497432642
stability is 0.1990204406243528
stability is 0.22635766797849008
stability is 0.347882151632619
stability is 0.023908314086395325
stability is 0.756692803361972
stability is 0.02126138754121081
stability is 0.14786960029764415
stability is 0.07697401906543215
stability is 0.07244677954298295
stability is 0.6306775167067011
stability is 0.008173685225111357
stability is 0.6776878807890432
st