In [1]:
# 0. Imports
from __future__ import annotations
import os, random, warnings
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm
from torch.utils.data import Subset
import torch.nn.functional as F
import mediapipe as mp


In [2]:
# 1. Paths & device
SCRIPT_DIR    = Path().resolve()
DATA_ROOT     = SCRIPT_DIR/"Data-REHAB24-6"
WIN_CSV       = DATA_ROOT/"Segmentation_windows.csv"
KEYPT_ROOT    = DATA_ROOT/"mp_keypoints"

DEVICE = (
    torch.device("mps") if torch.backends.mps.is_available() else
    torch.device("cuda") if torch.cuda.is_available() else
    torch.device("cpu")
)
print("► Using device:", DEVICE)

► Using device: mps


In [None]:
# 2. Joint names & count
PoseLandmark = mp.solutions.pose.PoseLandmark

# Then:
JOINT_NAMES = [lm.name for lm in PoseLandmark]
N_JOINTS    = len(JOINT_NAMES)  # should be 33

print(f"JOINT_NAMES: {JOINT_NAMES}")
print(f"N_JOINTS: {N_JOINTS}")

#  Exerciseses (Ex1…Ex6)
NUM_EXERCISES = 6
CKPT_FILE     = "kp_pose_quality_windows_ex.pt"  

ERR_JOINTS   = [
  "LEFT_ELBOW","RIGHT_ELBOW",
  "LEFT_SHOULDER","RIGHT_SHOULDER",
  "LEFT_HIP","RIGHT_HIP",
  "LEFT_KNEE","RIGHT_KNEE",
  "SPINE","HEAD",
]
N_ERR = len(ERR_JOINTS)   # 10
ERR_COLS = [f"err_{i}" for i in range(N_ERR)]


JOINT_NAMES: ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX', 'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE', 'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']
N_JOINTS: 33


In [4]:
# DATASET
class KeypointWindowDataset(Dataset):
    def __init__(self, csv_file: Path, keypt_root: Path):
        df = pd.read_csv(csv_file)
        df = df.sort_values(["video_id","repetition_number","window_start"])
        self.rows = df.to_dict("records")
        self.keypt_root = keypt_root

    def __len__(self):
        return len(self.rows)

    def __getitem__(self, i: int):
        r   = self.rows[i]
        ex  = int(r["exercise_id"]) - 1      # zero‐based [0..NUM_EXERCISES-1]
        vid = r["video_id"]
        f0, f1 = int(r["window_start"]), int(r["window_end"])

        # load keypoints
        arr = np.load(
            next((self.keypt_root/f"Ex{ex+1}").glob(f"{vid}-Camera17*-mp.npy"))
        )  # shape (F,33,3)

        seg = arr[f0:f1]            # (T, 33, 3)
        seg = seg.reshape(len(seg), -1)  # (T, 99)
        seq = torch.from_numpy(seg).float()

        label = torch.tensor(r["correctness"], dtype=torch.long)
        err   = torch.tensor([r[f"err_{j}"] for j in range(N_ERR)],
                             dtype=torch.float32)

        return seq, label, err, ex

In [5]:
# 5. Model definitions
class KeypointEncoder(nn.Module):
    def __init__(self, in_dim:int, embed:int=512):
        super().__init__()
        self.conv1 = nn.Conv1d(in_dim, 128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(128, embed, kernel_size=3, padding=1)
        self.pool  = nn.AdaptiveAvgPool1d(1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (B, D); treat as (B, D, 1) for Conv1d
        x = x.unsqueeze(2)                 # → (B, D, 1)
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        return self.pool(x).squeeze(-1)    # → (B, embed)

class PoseQualityNetKP(nn.Module):
    def __init__(self,
                 in_dim: int,
                 num_ex: int,
                 hidden: int = 256,
                 ex_emb: int = 64):
        super().__init__()
        # keypoint feature extractor
        self.encoder = KeypointEncoder(in_dim)

        # sequence model
        self.lstm = nn.LSTM(
            input_size=512,
            hidden_size=hidden,
            num_layers=2,
            batch_first=True,
            bidirectional=True
        )
        feat_dim = hidden * 2

        # exercise embedding MLP
        self.ex_emb = nn.Sequential(
            nn.Linear(num_ex, ex_emb),
            nn.ReLU(),
            nn.Linear(ex_emb, ex_emb)
        )

        # final heads
        self.cls_head = nn.Linear(feat_dim + ex_emb, 2)
        self.err_head = nn.Linear(feat_dim + ex_emb, N_ERR)

    def forward(self,
                seq:     torch.Tensor,  # (B, T, D)
                ex_1hot: torch.Tensor   # (B, num_ex)
    ) -> tuple[torch.Tensor, torch.Tensor]:
        # 1) keypoint → sequence feats
        # encode each frame
        B,T,_ = seq.shape
        feats = torch.stack([
            self.encoder(seq[:,t]) for t in range(T)
        ], dim=1)                                # (B, T, 512)
        out, _ = self.lstm(feats)                # (B, T, 2*hidden)
        g = out.mean(1)                          # (B, 2*hidden)

        # 2) exercise embed
        ex_e = self.ex_emb(ex_1hot)              # (B, ex_emb)

        # 3) concat and heads
        h = torch.cat([g, ex_e], dim=1)          # (B, feat_dim+ex_emb)
        return self.cls_head(h), self.err_head(h)


In [6]:
# Training
def train_epochs(
    csv_file:  str   = str(WIN_CSV),
    keypt_root:str   = str(KEYPT_ROOT),
    num_ex:    int   = NUM_EXERCISES,
    epochs:    int   = 30,
    batch:     int   = 16,
    lr:        float = 1e-4,
    ckpt_file: str   = CKPT_FILE
):
    # Build dataset and split
    ds  = KeypointWindowDataset(Path(csv_file), Path(keypt_root))
    N   = len(ds)
    idx = np.arange(N); np.random.shuffle(idx)
    c1, c2 = int(0.7*N), int(0.85*N)
    train_idx, val_idx = idx[:c1], idx[c1:c2]

    train_dl = DataLoader(Subset(ds, train_idx), batch_size=batch, shuffle=True)
    val_dl   = DataLoader(Subset(ds, val_idx),   batch_size=batch, shuffle=False)

    # Infer input dimension
    sample_seq, _, _, _ = ds[0]
    in_dim = sample_seq.shape[-1]

    # Build model
    model    = PoseQualityNetKP(in_dim, num_ex).to(DEVICE)
    loss_cls = nn.CrossEntropyLoss()
    loss_err = nn.SmoothL1Loss()
    opt      = Adam(model.parameters(), lr)

    best_f1 = 0.0
    for epoch in range(1, epochs+1):
        # -- train --
        model.train()
        tot_loss = 0.0
        for seq, y, err, ex in tqdm(train_dl, desc=f"Epoch {epoch:02d}"):
            seq, y, err, ex = [x.to(DEVICE) for x in (seq, y, err, ex)]
            # Build one-hot encoding for exercise
            ex_1hot = F.one_hot(ex, num_ex).float()

            opt.zero_grad()
            logits, err_hat = model(seq, ex_1hot)
            loss = loss_cls(logits, y) + 0.1 * loss_err(err_hat, err)
            loss.backward()
            opt.step()

            tot_loss += loss.item() * y.size(0)
        print(f"  ↳ train loss: {tot_loss/len(train_idx):.4f}")

        # -- validation --
        model.eval()
        y_true, y_pred, errs = [], [], []
        with torch.no_grad():
            for seq, y, err, ex in val_dl:
                seq, y, err, ex = [x.to(DEVICE) for x in (seq, y, err, ex)]
                ex_1hot = F.one_hot(ex, num_ex).float()
                logits, err_hat = model(seq, ex_1hot)

                y_true += y.cpu().tolist()
                y_pred += logits.argmax(1).cpu().tolist()
                errs    += [(err_hat - err.to(DEVICE)).abs().mean(1)]

        acc = accuracy_score(y_true, y_pred)
        f1  = f1_score(y_true, y_pred)
        mae = torch.cat(errs).mean().item()
        print(f"  ↳ val acc {acc:.3f}, F1 {f1:.3f}, MAE° {mae:.2f}")

        # Save the model with the best F1 score
        if f1 > best_f1:
            best_f1 = f1
            # Save the entire model (including architecture and weights)
            torch.save(model, ckpt_file)  # Save the entire model
            print(f"  ✓ saved new best model to {ckpt_file}  (F1 {f1:.3f})")


In [7]:
train_epochs(epochs=50, batch=16, lr=1e-4)

Epoch 01:  40%|███▉      | 249/629 [00:05<00:08, 46.17it/s]


KeyboardInterrupt: 