In [4]:
# Look at a numpy file containing 3D keypoints

import numpy as np
from pathlib import Path

# ─── edit this to your target file ────────────────────────────────────────────
file_path = Path("Data-REHAB24-6/mp_keypoints/Ex6/PM_008-Camera17-30fps-mp.npy")
# ────────────────────────────────────────────────────────────────────────────────

# load
arr = np.load(file_path)

# basic info
print(f"Loaded: {file_path}")
print(f" dtype: {arr.dtype}")
print(f" shape: {arr.shape}  (frames × landmarks × coords)\n")

# show first few frames
n_show = min(3, arr.shape[0])
for i in range(n_show):
    print(f"Frame #{i:03d} (33×3):")
    print(arr[i])
    print(f"  → first landmark: {tuple(arr[i,0])}\n")

# overall statistics
print("Overall coordinate stats:")
for idx, name in enumerate(("x", "y", "z")):
    data = arr[..., idx]
    print(f"  {name}: min={data.min():.3f}, max={data.max():.3f}, mean={data.mean():.3f}")


Loaded: Data-REHAB24-6/mp_keypoints/Ex6/PM_008-Camera17-30fps-mp.npy
 dtype: float32
 shape: (5191, 33, 3)  (frames × landmarks × coords)

Frame #000 (33×3):
[[-0.03379065 -0.6112996  -0.22619084]
 [-0.02455677 -0.628223   -0.2275483 ]
 [-0.0257254  -0.6304051  -0.21732828]
 [-0.02547118 -0.6302204  -0.21820049]
 [-0.02778288 -0.6363151  -0.2358914 ]
 [-0.02677054 -0.63463634 -0.24706481]
 [-0.02263773 -0.61979294 -0.2281486 ]
 [ 0.02648694 -0.6184615  -0.16859515]
 [-0.03559308 -0.56201506 -0.14808732]
 [ 0.0030987  -0.59678274 -0.19447449]
 [-0.01712019 -0.55977213 -0.21580447]
 [ 0.1250833  -0.49333623 -0.08702794]
 [-0.05722423 -0.53108674 -0.02576461]
 [ 0.15178505 -0.51440114 -0.09251688]
 [-0.17247145 -0.4994753  -0.05377672]
 [ 0.14430666 -0.54461473 -0.03090633]
 [-0.32217076 -0.5845331  -0.08351779]
 [ 0.1352469  -0.5789426  -0.02217976]
 [-0.33411983 -0.62724733 -0.13406767]
 [ 0.11857966 -0.58959824 -0.04421883]
 [-0.30619952 -0.65674794 -0.14033641]
 [ 0.13962431 -0.533594

In [None]:
# generate_windows.py
import pandas as pd
import numpy as np
import mediapipe as mp
import math
from pathlib import Path

# 1. helpers --------------------------------------------------
def angle_between(a,b,c):
    BA = a-b; BC = c-b
    cosθ = np.dot(BA,BC)/(np.linalg.norm(BA)*np.linalg.norm(BC))
    return math.degrees(math.acos(np.clip(cosθ,-1,1)))

PoseLandmark = mp.solutions.pose.PoseLandmark
JOINT_TRIPLETS = {
    "LEFT_ELBOW":   (PoseLandmark.LEFT_SHOULDER.value,
                     PoseLandmark.LEFT_ELBOW.value,
                     PoseLandmark.LEFT_WRIST.value),
    "RIGHT_ELBOW":  (PoseLandmark.RIGHT_SHOULDER.value,
                     PoseLandmark.RIGHT_ELBOW.value,
                     PoseLandmark.RIGHT_WRIST.value),
    "LEFT_SHOULDER":  (PoseLandmark.LEFT_ELBOW.value,
                       PoseLandmark.LEFT_SHOULDER.value,
                       PoseLandmark.LEFT_HIP.value),
    "RIGHT_SHOULDER": (PoseLandmark.RIGHT_ELBOW.value,
                       PoseLandmark.RIGHT_SHOULDER.value,
                       PoseLandmark.RIGHT_HIP.value),
    "LEFT_HIP":   (PoseLandmark.LEFT_SHOULDER.value,
                   PoseLandmark.LEFT_HIP.value,
                   PoseLandmark.LEFT_KNEE.value),
    "RIGHT_HIP":  (PoseLandmark.RIGHT_SHOULDER.value,
                   PoseLandmark.RIGHT_HIP.value,
                   PoseLandmark.RIGHT_KNEE.value),
    "LEFT_KNEE":  (PoseLandmark.LEFT_HIP.value,
                  PoseLandmark.LEFT_KNEE.value,
                  PoseLandmark.LEFT_ANKLE.value),
    "RIGHT_KNEE": (PoseLandmark.RIGHT_HIP.value,
                  PoseLandmark.RIGHT_KNEE.value,
                  PoseLandmark.RIGHT_ANKLE.value),
    "SPINE": (
       PoseLandmark.LEFT_HIP.value,       
       PoseLandmark.LEFT_SHOULDER.value,   
       PoseLandmark.RIGHT_SHOULDER.value   
    ),
    "HEAD": (
       PoseLandmark.LEFT_SHOULDER.value,
       PoseLandmark.NOSE.value,
       PoseLandmark.RIGHT_SHOULDER.value
    ),
}
ERR_JOINTS = list(JOINT_TRIPLETS.keys())
N_ERR = len(ERR_JOINTS)  # 10

# 2. load original metadata & keypoints -----------------------
DATA_ROOT    = Path("Data-REHAB24-6")
KEYPT_ROOT   = DATA_ROOT/"mp_keypoints"
META_ORIG    = DATA_ROOT/"Segmentation.xlsx"
df           = pd.read_excel(META_ORIG, engine="openpyxl")
df.columns   = df.columns.str.strip()

# 3. compute ideal_angles on correct reps ----------------------
ideal_angles = {}
correct = df[df.correctness==1]
for ex in correct.exercise_id.unique():
    all_ang = {jn:[] for jn in ERR_JOINTS}
    for _,r in correct[correct.exercise_id==ex].iterrows():
        vid, f0, f1 = r.video_id, int(r.first_frame), int(r.last_frame)
        files = list((KEYPT_ROOT/f"Ex{ex}").glob(f"{vid}-Camera17*-mp.npy"))
        if not files: continue
        arr = np.load(files[0])
        seg = arr[f0:f1] if f1>f0 else arr[f0:]
        if len(seg)==0: continue
        mid = len(seg)//2
        frm = seg[mid]
        for jn in ERR_JOINTS:
            ia,ib,ic = JOINT_TRIPLETS[jn]
            ang = angle_between(frm[ia,:2],frm[ib,:2],frm[ic,:2])
            all_ang[jn].append(ang)
    # median
    ideal_angles[ex] = {jn:float(np.median(all_ang[jn])) for jn in all_ang if all_ang[jn]}

# 4. slide windows & write rows --------------------------------
WINDOW, STRIDE = 16, 8
rows = []
for _,r in df.iterrows():
    vid, ex, f0, f1 = r.video_id, int(r.exercise_id), int(r.first_frame), int(r.last_frame)
    files = list((KEYPT_ROOT/f"Ex{ex}").glob(f"{vid}-Camera17*-mp.npy"))
    if not files: continue
    arr = np.load(files[0])                # (F,33,3)
    seg = arr[f0:f1] if f1>f0 else arr[f0:]
    if len(seg)<WINDOW: continue

    # per-frame errors
    pf_err = {jn:[] for jn in ERR_JOINTS}
    for frm in seg:
        for jn in ERR_JOINTS:
            ia,ib,ic = JOINT_TRIPLETS[jn]
            ang = angle_between(frm[ia,:2],frm[ib,:2],frm[ic,:2])
            pf_err[jn].append(ang - ideal_angles[ex].get(jn,ang))

    # slide
    for start in range(0, len(seg)-WINDOW+1, STRIDE):
        w = np.array([ pf_err[jn][start:start+WINDOW] for jn in ERR_JOINTS ])  # (10,WINDOW)
        mean_err = w.mean(axis=1)
        row = {
            "video_id":vid,
            "exercise_id":ex,
            "repetition_number":r.repetition_number,
            "window_start": f0+start,
            "window_end":   f0+start+WINDOW,
            "correctness":  r.correctness
        }
        for i,jn in enumerate(ERR_JOINTS):
            row[f"err_{i}"] = float(mean_err[i])
        rows.append(row)

win_df = pd.DataFrame(rows)
win_df.to_csv(DATA_ROOT/"Segmentation_windows.csv", index=False)
print("Wrote", len(win_df), "windows to Segmentation_windows.csv")


Wrote 14375 windows to Segmentation_windows.csv


In [2]:
# Imports (assuming previous imports are still valid)
import os
import cv2
import torch
import numpy as np
import torch.nn.functional as F
import mediapipe as mp
from collections import deque # Use deque for efficient buffer
from pathlib import Path
import pandas as pd # Needed for KeypointWindowDataset if re-defining
import torch.nn as nn # Needed for model classes if re-defining

# --- Re-include necessary definitions if running standalone ---
# 1. Paths & device (adjust if needed)
SCRIPT_DIR    = Path().resolve()
DATA_ROOT     = SCRIPT_DIR/"Data-REHAB24-6" # Make sure this path is correct

DEVICE = (
    torch.device("mps") if torch.backends.mps.is_available() else
    torch.device("cuda") if torch.cuda.is_available() else
    torch.device("cpu")
)
print("► Using device:", DEVICE)

# 2. Joint names & count
PoseLandmark = mp.solutions.pose.PoseLandmark
JOINT_NAMES = [lm.name for lm in PoseLandmark]
N_JOINTS    = len(JOINT_NAMES)  # should be 33

# 3. Exercises (Ex1…Ex6)
NUM_EXERCISES = 6
CKPT_FILE     = "kp_pose_quality_windows_ex.pt" # Check if this file exists

# --- MODIFIED: Define the list of joints the error head predicts ---
# Make sure this EXACT order matches how the model was trained (generate_windows.py)
ERR_JOINTS   = [
  "LEFT_ELBOW","RIGHT_ELBOW",
  "LEFT_SHOULDER","RIGHT_SHOULDER",
  "LEFT_HIP","RIGHT_HIP",
  "LEFT_KNEE","RIGHT_KNEE",
  "SPINE","HEAD",
]
N_ERR = len(ERR_JOINTS)
if N_ERR != 10:
    print(f"Warning: N_ERR is {N_ERR}, but expected 10 based on comment. Ensure ERR_JOINTS is correct.")
ERR_COLS = [f"err_{i}" for i in range(N_ERR)] # Used in training data prep, not directly here

# 5. Model definitions (Using the ORIGINAL definition from training)
class KeypointEncoder(nn.Module):
    # --- Restored ORIGINAL definition ---
    def __init__(self, in_dim:int, embed:int=512):
        super().__init__()
        self.conv1 = nn.Conv1d(in_dim, 128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(128, embed, kernel_size=3, padding=1)
        self.pool  = nn.AdaptiveAvgPool1d(1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (B, D); treat as (B, D, 1) for Conv1d
        # This encoder is designed to process features of a SINGLE frame (B, D)
        if x.dim() == 2:
            x = x.unsqueeze(2)                 # → (B, D, 1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        return self.pool(x).squeeze(-1)    # → (B, embed)


class PoseQualityNetKP(nn.Module):
    # --- Keep the PoseQualityNetKP class definition as in the original code ---
    def __init__(self,
                 in_dim: int, # Should be 99 (33*3)
                 num_ex: int,
                 hidden: int = 256,
                 ex_emb: int = 64,
                 embed: int = 512): # Added embed dim to match encoder
        super().__init__()
        # keypoint feature extractor (Uses the restored original encoder)
        self.encoder = KeypointEncoder(in_dim, embed=embed)

        # sequence model
        self.lstm = nn.LSTM(
            input_size=embed, # Use embed dim here
            hidden_size=hidden,
            num_layers=2,
            batch_first=True,
            bidirectional=True
        )
        feat_dim = hidden * 2

        # exercise embedding MLP
        self.ex_emb = nn.Sequential(
            nn.Linear(num_ex, ex_emb),
            nn.ReLU(),
            nn.Linear(ex_emb, ex_emb)
        )

        # final heads
        self.cls_head = nn.Linear(feat_dim + ex_emb, 2) # 2 classes: incorrect, correct
        self.err_head = nn.Linear(feat_dim + ex_emb, N_ERR) # Predicts N_ERR error values

    def forward(self,
                seq:     torch.Tensor,  # (B, T, D) where D=99
                ex_1hot: torch.Tensor   # (B, num_ex)
    ) -> tuple[torch.Tensor, torch.Tensor]:
        # 1) keypoint → sequence feats
        # encode each frame
        B, T, D = seq.shape # Now this should work
        # Process sequence frame by frame using the encoder
        frame_embeddings = []
        for t in range(T):
            frame_data = seq[:, t, :] # Get data for frame t: (B, D)
            frame_embedding = self.encoder(frame_data) # Output: (B, embed)
            frame_embeddings.append(frame_embedding)

        feats = torch.stack(frame_embeddings, dim=1) # (B, T, embed)

        # 2) sequence model (LSTM)
        out, _ = self.lstm(feats)                # (B, T, 2*hidden)
        # Aggregate LSTM outputs (e.g., mean pooling over time)
        g = out.mean(dim=1)                      # (B, 2*hidden)

        # 3) exercise embed
        ex_e = self.ex_emb(ex_1hot)              # (B, ex_emb)

        # 4) concat and heads
        h = torch.cat([g, ex_e], dim=1)          # (B, feat_dim + ex_emb)
        logits = self.cls_head(h)                # (B, 2)
        err_hat = self.err_head(h)               # (B, N_ERR)

        return logits, err_hat
# --- End of re-included definitions ---


# Load model
if not Path(CKPT_FILE).exists():
    print(f"Error: Checkpoint file not found at {CKPT_FILE}")
    exit()

print(f"Loading model from {CKPT_FILE}...")
infer_model = torch.load(CKPT_FILE, map_location=DEVICE)
infer_model.eval()
print("✅ Model loaded.")

# Exercise map
EXERCISE_MAP = {
    1: "Arm abduction",
    2: "Arm VW",
    3: "Push-ups",
    4: "Leg abduction",
    5: "Leg lunge",
    6: "Squats"
}
NUM_EXERCISES = len(EXERCISE_MAP) # Ensure consistency

# Ask user for exercise ID
while True:
    try:
        exercise_id_str = input(f"Enter the exercise ID you're performing (1-{len(EXERCISE_MAP)}): ")
        exercise_id = int(exercise_id_str)
        if 1 <= exercise_id <= len(EXERCISE_MAP):
            exercise_name = EXERCISE_MAP[exercise_id]
            break
        else:
            print(f"Invalid ID. Please enter a number between 1 and {len(EXERCISE_MAP)}.")
    except ValueError:
        print("Invalid input. Please enter a number.")

# MediaPipe Pose Setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(
    static_image_mode=False,
    model_complexity=2, # Match training
    enable_segmentation=False,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils

# Keypoints extraction function (using world landmarks)
def extract_keypoints(frame):
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_rgb.flags.writeable = False
    result = pose.process(img_rgb)
    img_rgb.flags.writeable = True

    keypoints = None
    landmarks_for_drawing = None

    if result.pose_world_landmarks:
        world_landmarks = result.pose_world_landmarks.landmark
        # Using world landmarks for model input - consistent with potential training setup
        keypoints = np.array([(lm.x, lm.y, lm.z) for lm in world_landmarks], dtype=np.float32)

    if result.pose_landmarks:
         # Using image landmarks just for drawing
         landmarks_for_drawing = result.pose_landmarks

    return keypoints, landmarks_for_drawing

# Inference parameters
SEQUENCE_LENGTH = 16 # Match training window size
IN_DIM = N_JOINTS * 3 # 33 * 3 = 99

# --- MODIFIED Inference Function ---
def infer_and_feedback(model, video_path, selected_ex_id, selected_ex_name):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return

    keypoints_buffer = deque(maxlen=SEQUENCE_LENGTH)
    feedback_status = "Initializing..." # Overall status (Correct/Incorrect/Collecting...)
    joint_feedback = [] # <-- MODIFIED: Store specific joint feedback messages
    predicted_class = -1 # -1: Initializing, 0: Incorrect, 1: Correct
    err_values = np.zeros(N_ERR) # Store last raw error values for analysis

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("End of video or cannot read frame.")
            break

        world_keypoints, image_landmarks_for_drawing = extract_keypoints(frame)

        # Draw Image Landmarks if detected
        if image_landmarks_for_drawing:
            mp_drawing.draw_landmarks(
                frame,
                image_landmarks_for_drawing,
                mp_pose.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                connection_drawing_spec=mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
            )

        # --- Model Inference Logic ---
        if world_keypoints is not None:
            keypoints_buffer.append(world_keypoints)

            # Clear previous specific feedback before potentially generating new one
            joint_feedback = []

            if len(keypoints_buffer) == SEQUENCE_LENGTH:
                # Prepare sequence
                keypoints_array = np.array(keypoints_buffer, dtype=np.float32) # (16, 33, 3)
                keypoints_flat = keypoints_array.reshape(SEQUENCE_LENGTH, -1) # (16, 99)
                seq = torch.tensor(keypoints_flat, dtype=torch.float32).unsqueeze(0).to(DEVICE) # (1, 16, 99)

                # Prepare exercise ID
                ex_tensor = torch.tensor([selected_ex_id - 1], device=DEVICE)
                ex_1hot = F.one_hot(ex_tensor, num_classes=NUM_EXERCISES).float() # (1, NUM_EXERCISES)

                # --- Run Inference ---
                model.eval()
                with torch.no_grad():
                    logits, err_hat = model(seq, ex_1hot)
                    # Class prediction (0: incorrect, 1: correct)
                    predicted_class = logits.argmax(1).item()
                    # Predicted errors (raw values from the model)
                    err_values = err_hat.squeeze().cpu().numpy() # Shape: (N_ERR,)

                # --- Generate Feedback based on prediction ---
                if predicted_class == 1:
                    feedback_status = "Correct"
                    # No specific joint feedback needed if correct
                else: # predicted_class == 0 (Incorrect)
                    feedback_status = "Incorrect"
                    # --- MODIFICATION START: Analyze errors ---
                    # Calculate absolute errors to find the largest deviation
                    abs_errors = np.abs(err_values)

                    # Find the indices of the top N largest absolute errors
                    # argsort gives indices from smallest to largest
                    sorted_indices = np.argsort(abs_errors)

                    # Get the indices of the top 3 largest errors (last 3 in sorted list)
                    # You can change 3 to 1 if you only want the single most erroneous joint
                    top_n = 3
                    top_indices = sorted_indices[-top_n:]

                    # Create feedback messages for the top errors
                    for idx in reversed(top_indices): # Show most prominent error first
                        # Ensure the index is valid (although it should be)
                        if 0 <= idx < len(ERR_JOINTS):
                           joint_name = ERR_JOINTS[idx]
                           # Add the specific joint feedback message
                           joint_feedback.append(f"Correct your {joint_name}")
                        else:
                            print(f"Warning: Invalid error index {idx} encountered.")
                    # --- MODIFICATION END ---

            else: # Buffer not full yet
                feedback_status = f"Collecting frames... {len(keypoints_buffer)}/{SEQUENCE_LENGTH}"
                predicted_class = -1 # Reset prediction state while collecting
                # joint_feedback is already cleared at the start of the loop iteration

        else: # No pose detected
            feedback_status = "No pose detected"
            keypoints_buffer.clear() # Clear buffer if pose is lost
            predicted_class = -1 # Reset prediction state
            joint_feedback = []  # Clear specific feedback
            err_values = np.zeros(N_ERR) # Reset errors

        # --- Display Feedback on Frame ---
        # Display Exercise Name
        cv2.putText(frame, f"Exercise: {selected_ex_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.LINE_AA)

        # Display Overall Status (Correct/Incorrect/Collecting...)
        status_color = (0, 255, 0) if predicted_class == 1 else ((0, 0, 255) if predicted_class == 0 else (255, 150, 0)) # Green/Red/Orange
        cv2.putText(frame, f"Status: {feedback_status}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, status_color, 2, cv2.LINE_AA)

        # --- MODIFIED: Display Specific Joint Feedback if Incorrect ---
        feedback_y_start = 90 # Starting Y position for joint feedback lines
        # Only display if the class is incorrect AND there are feedback messages
        if predicted_class == 0 and joint_feedback:
            for i, msg in enumerate(joint_feedback):
                # Display each specific correction suggestion
                cv2.putText(frame, msg, (10, feedback_y_start + i * 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 165, 255), 2) # Orange/Yellow text

        # Show the frame
        cv2.imshow('Pose Estimation Feedback', frame)

        # Exit condition
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()
    pose.close()


# --- Main execution part ---
# Path to the video file (Make sure this exists)
# Example: Use Exercise 1 video for testing
VIDEO_PATH = "Data-REHAB24-6/Videos/Ex1/PM_001-Camera17-30fps.mp4"
# Or uncomment another exercise video
# VIDEO_PATH = "Data-REHAB24-6/Videos/Ex2/PM_003-Camera17-30fps.mp4"

if not Path(VIDEO_PATH).exists():
     print(f"Error: Video file not found at {VIDEO_PATH}")
else:
    # Run the inference and feedback function
    infer_and_feedback(infer_model, VIDEO_PATH, exercise_id, exercise_name)

  infer_model = torch.load(CKPT_FILE, map_location=DEVICE)


► Using device: mps
Loading model from kp_pose_quality_windows_ex.pt...
✅ Model loaded.


I0000 00:00:1745030852.291959  946755 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M4 Max
W0000 00:00:1745030852.353390 1014573 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1745030852.375076 1014585 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
