Prepare LSTM training data using [ultralytics pose estimation model](https://github.com/ultralytics/ultralytics) for better robustness

In [None]:
import os
from glob import glob
from pathlib import Path
from ultralytics.models import YOLO
from ultralytics.engine.results import Results
from tqdm import tqdm
import numpy as np
import pandas as pd
import cv2
import torch

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [None]:
POSE_OUTPUT_LABELS = [
    "nose_x", "nose_y",
    "left_eye_x", "left_eye_y",
    "right_eye_x", "right_eye_y",
    "left_ear_x", "left_ear_y",
    "right_ear_x", "right_ear_y",
    "left_shoulder_x", "left_shoulder_y",
    "right_shoulder_x", "right_shoulder_y",
    "left_elbow_x", "left_elbow_y",
    "right_elbow_x", "right_elbow_y",
    "left_wrist_x", "left_wrist_y",
    "right_wrist_x", "right_wrist_y",
    "left_hip_x", "left_hip_y",
    "right_hip_x", "right_hip_y",
    "left_knee_x", "left_knee_y",
    "right_knee_x", "right_knee_y",
    "left_ankle_x", "left_ankle_y",
    "right_ankle_x", "right_ankle_y",
]

In [None]:
# Make sure working directory is correct
if os.getcwd().endswith("training_data_preparation"):
    PROJECT_DIR = Path("..")
else:
    PROJECT_DIR = Path()
PROJECT_DIR

In [None]:
DATASET_DIR = PROJECT_DIR / "Dataset" / "KTH"
POSE_MODEL_DIR = PROJECT_DIR / "models" / "pose"

DATASET_SAVE_DIR = PROJECT_DIR / "TrainingData" / "ultralytics"
DATASET_RAW_SAVE_DIR = DATASET_SAVE_DIR / "raw"
DATASET_NORMALIZED_SAVE_DIR = DATASET_SAVE_DIR / "normalized"

if not DATASET_RAW_SAVE_DIR.exists():
    DATASET_RAW_SAVE_DIR.mkdir(parents=True, exist_ok=True)
if not DATASET_NORMALIZED_SAVE_DIR.exists():
    DATASET_NORMALIZED_SAVE_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
OVERWRITE_DATASET = False # Whether to overwrite existing dataset parquet files if they already exist

In [None]:
model_size = "n" # nano model is accurate enough
pose_model = YOLO(POSE_MODEL_DIR / f"yolo11{model_size}-pose.pt").to(device)

In [None]:
def save_raw_coordinates(results: list[Results]):
    """Process the pose model results' raw x,y coordinates and put them into a pandas dataframe."""
    no_detection = np.zeros(len(POSE_OUTPUT_LABELS))
    con = []
    for result in results:
        kp_xy = result.keypoints.xy.flatten().cpu().numpy()
        if len(kp_xy) == 0:
            con.append(no_detection)
        elif len(kp_xy) == len(POSE_OUTPUT_LABELS):
            con.append(kp_xy)
        else:
            # multiple person detected, but the KTH dataset has only one person in a video.
            # we just ignore this frame
            continue
    return pd.DataFrame(con, columns=POSE_OUTPUT_LABELS)

def save_normalized_coordinates(results: list[Results]):
    """Process the pose model results' normalized x,y coordinates and put them into a pandas dataframe."""
    no_detection = np.zeros(len(POSE_OUTPUT_LABELS))
    con = []
    for result in results:
        kp_xy = result.keypoints.xyn.flatten().cpu().numpy()
        if len(kp_xy) == 0:
            con.append(no_detection)
        elif len(kp_xy) == len(POSE_OUTPUT_LABELS):
            con.append(kp_xy)
        else:
            # multiple person detected, but the KTH dataset has only one person in a video.
            # we just ignore this frame
            continue
    return pd.DataFrame(con, columns=POSE_OUTPUT_LABELS)

Process Videos and save the pose estimation results to parquet file.

In [None]:
for label in ["jogging", "walking", "running"]:
    video_filenames = glob(str(DATASET_DIR/label) + "/*.avi")

    for video_filename in tqdm(video_filenames, desc=label):
        results = pose_model.track(
            source=video_filename,
            show=False,
            verbose=False,
            stream=True,
        )
        results = list(results)
        df = save_raw_coordinates(results)
        normalized_df = save_normalized_coordinates(results)
        parquet_name = os.path.basename(video_filename).rsplit('.')[0]

        raw_save_path = DATASET_SAVE_DIR / "raw" / f"{parquet_name}.parquet"
        normalized_save_path = DATASET_SAVE_DIR / "normalized" / f"{parquet_name}.parquet"
        if not raw_save_path.exists() or OVERWRITE_DATASET:
            df.to_parquet(raw_save_path)
        if not normalized_save_path.exists() or OVERWRITE_DATASET:
            normalized_df.to_parquet(DATASET_SAVE_DIR / "normalized" / f"{parquet_name}.parquet")