In [None]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'C:\\Users\\Legion\\OneDrive\\Desktop\\Paris-Saclay\\Learning\\AI\\badminton-pose-coach'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    raw_data_path: Path
    processed_data_path: Path
    params_fps: int
    params_conf: int
    params_keypoint_extraction_model: str

In [5]:
from badmintonPoseCoach.constants import *
from badmintonPoseCoach.utils.common import *

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        params = self.params.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            raw_data_path = Path(config.raw_data_path),
            processed_data_path = Path(config.processed_data_path),
            params_fps = params.fps,
            params_conf = params.conf,
            params_keypoint_extraction_model = params.keypoint_extraction_model,
        )
        return data_ingestion_config

In [7]:
import os
from badmintonPoseCoach import logger
import os, json, pathlib
from typing import Iterable
import torch
from badmintonPoseCoach.utils.common import get_size, extract_keypoints_from_video

In [8]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def iter_videos(self, root: str) -> Iterable[str]:
        """Yield all video file paths under root recursively."""
        for dirpath, _, filenames in os.walk(root):
            for fn in filenames:
                if fn.lower().endswith("mp4"):
                    yield os.path.join(dirpath, fn)

    def mirror_and_save_json(self):
        """
        Walk through input_root/videos/<class>/*.mp4 and save JSON to
        output_root/<class>/<video_stem>.json, preserving the class folder.
        """
        input_root = os.path.abspath(self.config.raw_data_path)
        output_root = os.path.abspath(self.config.processed_data_path)

        for vp in self.iter_videos(input_root):
            # Compute relative path to preserve class folder
            rel = os.path.relpath(vp, input_root)              # e.g., "forehand/clip01.mp4"
            rel_no_ext = os.path.splitext(rel)[0]              # "forehand/clip01"
            out_json_path = os.path.join(output_root, rel_no_ext + ".json")

            # Ensure parent directory exists
            os.makedirs(os.path.dirname(out_json_path), exist_ok=True)

            # Skip if already extracted
            if os.path.exists(out_json_path):
                print(f"[SKIP] Exists: {out_json_path}")
                continue
            if not is_video_readable(vp):
                logger.info(f"[SKIP] Can't read: {rel}")
                continue

            print(f"[EXTRACT] {vp} -> {out_json_path}")
            data = extract_keypoints_from_video(
                video_path=vp,
                model_or_path=self.config.params_keypoint_extraction_model,
                fps_sample=self.config.params_fps,
                conf=self.config.params_conf,
            )
            data["seq"] = self.clean_data(data["seq"])
            if data["seq"] is None:
                continue

            # Attach label from parent directory name (class folder)
            label = pathlib.Path(rel_no_ext).parts[0]  # first folder under input_root
            data["label"] = label
            data["video_relpath"] = rel.replace("\\", "/")

            with open(out_json_path, "w", encoding="utf-8") as f:
                json.dump(data, f)

    @staticmethod
    def clean_data(seq):
        valid_frames = []
        for i in range(len(seq)):
            if torch.isfinite(torch.tensor(seq[i])).all():
                valid_frames.append(i)

        if len(valid_frames) < int(len(seq)*0.6):
            return None

        return [seq[i] for i in valid_frames]






In [9]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(data_ingestion_config)
    data_ingestion.mirror_and_save_json()
except Exception as e:
    raise e

[2025-09-27 18:12:05,618: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-09-27 18:12:05,622: INFO: common: yaml file: params.yaml loaded successfully]
[2025-09-27 18:12:05,624: INFO: common: created directory at: artifacts]
[2025-09-27 18:12:05,626: INFO: common: created directory at: artifacts/data_ingestion]
[EXTRACT] C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset\00_Short Serve\2022-08-30_18-00-09_dataset_set1_009_001424_001452_A_00.mp4 -> C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\artifacts\data_ingestion\00_Short Serve\2022-08-30_18-00-09_dataset_set1_009_001424_001452_A_00.json
[EXTRACT] C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset\00_Short Serve\2022-08-30_18-00-09_dataset_set1_016_001916_001941_A_00.mp4 -> C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Le

KeyboardInterrupt: 

In [9]:
obj = DataIngestion(None)
obj.mirror_and_save_json(
    input_root="data/VideoBadminton_Dataset/VideoBadminton_Dataset",
    output_root="artifacts/data_ingestion",
    model_or_path="yolo11n-pose.pt",
    fps_sample=15,
    conf=0.25,
)

[EXTRACT] C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset\00_Short Serve\2022-08-30_18-00-09_dataset_set1_009_001424_001452_A_00.mp4 -> C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\artifacts\data_ingestion\00_Short Serve\2022-08-30_18-00-09_dataset_set1_009_001424_001452_A_00.json
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt to 'yolo11n-pose.pt': 100% ━━━━━━━━━━━━ 6.0MB 886.0KB/s 6.9s9s<0.0ssss0s
[EXTRACT] C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset\00_Short Serve\2022-08-30_18-00-09_dataset_set1_016_001916_001941_A_00.mp4 -> C:\Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI\badminton-pose-coach\artifacts\data_ingestion\00_Short Serve\2022-08-30_18-00-09_dataset_set1_016_001916_001941_A_00.json
[EXTRACT] C:\Users\Legion\OneDrive\De

KeyboardInterrupt: 

In [1]:
from ultralytics import YOLO

In [8]:
model = YOLO("yolo11n-pose.pt")

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt to 'yolo11n-pose.pt': 100% ━━━━━━━━━━━━ 6.0MB 814.7KB/s 7.5ss<0.1s9sss8s


In [5]:
print(model)

YOLO(
  (model): PoseModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3k2(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats

In [1]:
import os
os.chdir("../")
from badmintonPoseCoach.utils.common import get_size, is_json_processed_ok
from pathlib import Path
json_path = Path(f"artifacts/data_ingestion/00_Short Serve/2022-08-30_18-00-09_dataset_set1_009_001424_001452_A_00.json")
is_json_processed_ok(json_path, 5)

True

In [2]:
import os
from badmintonPoseCoach.utils.common import get_size, is_json_processed_ok, extract_keypoints_from_video
os.path.exists(json_path)

True

In [3]:
data = extract_keypoints_from_video("C:/Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI/badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset/00_Short Serve/2022-08-30_19-35-00_dataset_set1_134_009681_009702_A_00.mp4", "yolo11n-pose.pt", fps_sample=15, conf=0.1)
data

  data = extract_keypoints_from_video("C:/Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI/badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset/00_Short Serve/2022-08-30_19-35-00_dataset_set1_134_009681_009702_A_00.mp4", "yolo11n-pose.pt", fps_sample=15, conf=0.1)


[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: ultralytics.engine.results.Keypoints object
masks: None
names: {0: 'person'}
obb: None
orig_img: array([[[124,  53,  52],
        [124,  53,  52],
        [124,  53,  52],
        ...,
        [102,  40,  34],
        [102,  40,  34],
        [102,  40,  34]],

       [[124,  53,  52],
        [124,  53,  52],
        [124,  53,  52],
        ...,
        [102,  40,  34],
        [102,  40,  34],
        [102,  40,  34]],

       [[124,  53,  52],
        [124,  53,  52],
        [124,  53,  52],
        ...,
        [102,  40,  34],
        [102,  40,  34],
        [102,  40,  34]],

       ...,

       [[ 28,  42,  27],
        [ 28,  42,  27],
        [ 28,  42,  27],
        ...,
        [ 27,  43,  30],
        [ 27,  43,  30],
        [ 27,  43,  30]],

       [[ 28,  42,  27],
        [ 28,  42,  27],
        [ 28,  42,  27],
        ...,
        [ 27,  43,  30]

{'fps_sample': 15,
 'seq': [[[nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan],
   [nan, nan, nan]],
  [[535.4442138671875, 375.1937561035156, 0.5455293655395508],
   [536.7182006835938, 371.656982421875, 0.522096574306488],
   [534.4854125976562, 372.01007080078125, 0.3251242935657501],
   [540.1756591796875, 370.9814758300781, 0.5569188594818115],
   [534.385986328125, 371.8968200683594, 0.2228112369775772],
   [549.117919921875, 383.39056396484375, 0.9636653065681458],
   [532.2842407226562, 384.7289123535156, 0.9265967011451721],
   [563.3696899414062, 401.95880126953125, 0.8933402895927429],
   [526.6290283203125, 404.1922607421875, 0.6929339170455933],
   [564.9537963867188, 421.9171447753906, 0.8277733325958252],
   [

In [18]:
import torch
seq = data["seq"]
for i in range(len(seq)):
    if torch.isfinite(torch.tensor(seq[i])).all() == False:
        print(i)

0


In [30]:


data = extract_keypoints_from_video("C:/Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI/badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset/00_Short Serve/2022-08-30_19-35-00_dataset_set1_134_009681_009702_A_00.mp4", "yolo11n-pose.pt", fps_sample=15, conf=0.25)

print(clean_data(torch.tensor(data["seq"])))

  data = extract_keypoints_from_video("C:/Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI/badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset/00_Short Serve/2022-08-30_19-35-00_dataset_set1_134_009681_009702_A_00.mp4", "yolo11n-pose.pt", fps_sample=15, conf=0.25)


None


In [23]:
data = extract_keypoints_from_video("C:/Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI/badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset/00_Short Serve/2022-08-30_19-35-00_dataset_set1_134_009681_009702_A_00.mp4", "yolo11n-pose.pt", fps_sample=15, conf=0.1)

print(clean_data(torch.tensor(data["seq"])))

  data = extract_keypoints_from_video("C:/Users\Legion\OneDrive\Desktop\Paris-Saclay\Learning\AI/badminton-pose-coach\data\VideoBadminton_Dataset\VideoBadminton_Dataset/00_Short Serve/2022-08-30_19-35-00_dataset_set1_134_009681_009702_A_00.mp4", "yolo11n-pose.pt", fps_sample=15, conf=0.1)


None
