In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
sapiens_raw_data.py

📌 전체 파이프라인:
1. RAW_DATA_ROOT 내부에서 mp4/mov 영상 파일 탐색
2. CSV(video_metadata.csv)와 비교 → 없는 파일은 신규 추가
   - video_path 실제 파일 없으면 exists=0
   - RAW_DATA_ROOT에 새 파일 있으면 CSV에 추가 및 즉시 저장
3. frames_verified == 0 인 경우:
   - 기존 프레임 폴더 전체 삭제
   - 영상으로부터 720p 다운샘플링하여 프레임 재추출
   - 추출 완료 후 frames_verified=1, n_extracted_frames 업데이트
   - 프레임 추출이 끝나면 CSV 저장
4. frames_verified == 1 인 경우: 프레임 추출 스킵
5. Sapiens 모델 실행하여 keypoints_json 저장
   - 실행 완료 시 해당 row를 즉시 CSV에 저장
"""

import os, cv2, json, subprocess, shutil
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed

# ---------------- 외부 라이브러리 ----------------
from mmdet.apis import init_detector, inference_detector
from mmpose.apis import init_model as init_pose_estimator, inference_topdown
from mmpose.utils import adapt_mmdet_pipeline
from mmpose.evaluation.functional import nms
from mmpose.structures import merge_data_samples, split_instances
import mmpretrain  # VisionTransformer 등록

# ---------------- 경로 설정 ----------------
RAW_DATA_ROOT = Path("../data/new_data/raw_data")
CSV_PATH = Path("../data/new_data/video_metadata.csv")
OUTPUT_ROOTS = {
    "frames_dir": Path("../data/new_data/frames_output"),
    "keypoints_dir": Path("../data/new_data/keypoints_json"),
}
VIDEO_EXTS = [".mp4", ".MP4", ".mov", ".MOV"]

# ---------------- 프레임 추출 옵션 ----------------
TARGET_SHORT = 720
JPEG_QUALITY = 80
MAX_WORKERS = 4

# ---------------- Sapiens 모델 설정 ----------------
DET_CONFIG  = "../sapiens/pose/demo/mmdetection_cfg/rtmdet_m_640-8xb32_coco-person_no_nms.py"
DET_CKPT    = "../sapiens/pose/checkpoints/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth"
POSE_CONFIG = "../sapiens/pose/configs/sapiens_pose/coco/sapiens_0.3b-210e_coco-1024x768.py"
POSE_CKPT   = "../sapiens/pose/checkpoints/sapiens_0.3b/sapiens_0.3b_coco_best_coco_AP_796.pth"

# ---------------- 유틸 함수 ----------------
def to_py(obj):
    """넘파이 객체를 JSON 직렬화 가능한 파이썬 타입으로 변환"""
    import numpy as _np
    if isinstance(obj, _np.ndarray): return obj.tolist()
    if isinstance(obj, (_np.floating,)): return float(obj)
    if isinstance(obj, (_np.integer,)):  return int(obj)
    if isinstance(obj, dict):  return {k: to_py(v) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)): return [to_py(v) for v in obj]
    return obj

# ---------------- Frame 추출 ----------------
def extract_frames(video_path, frame_dir, target_short=720, jpeg_quality=80):
    """프레임 재추출 (폴더 삭제 후 720p 리사이즈 저장, 추출된 프레임 수 반환)"""
    video_path, frame_dir = Path(video_path), Path(frame_dir)
    if frame_dir.exists():
        shutil.rmtree(frame_dir)  # ✅ 기존 폴더 삭제
    frame_dir.mkdir(parents=True, exist_ok=True)

    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        return f"[SKIP] 열기 실패: {video_path}", 0

    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    scale = target_short / w if w <= h else target_short / h
    new_w, new_h = int(round(w * scale)), int(round(h * scale))

    extracted_count = 0
    for idx in range(n_frames):
        ret, frame = cap.read()
        if not ret: break
        resized = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
        out_path = frame_dir / f"{idx:06d}.jpg"
        if cv2.imwrite(str(out_path), resized, [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality]):
            extracted_count += 1

    cap.release()
    return f"[DONE] {video_path.name} {w}x{h} → {new_w}x{new_h}, 총 {extracted_count} 프레임", extracted_count

# ---------------- CSV 업데이트 ----------------
def update_metadata_csv():
    """CSV 갱신 (존재 여부만 체크, 메타데이터 갱신은 안 함)"""
    if CSV_PATH.exists():
        df = pd.read_csv(CSV_PATH, encoding="utf-8-sig")
    else:
        df = pd.DataFrame(columns=[
            "file_name","video_path","subdir",
            "width","height","num_frames","fps","duration_sec","codec",
            "frames_dir","n_extracted_frames","keypoints_dir",
            "frames_verified","sapiens_done","exists"
        ])

    # 존재 여부만 체크
    for idx, row in df.iterrows():
        vpath = Path(row["video_path"])
        df.loc[idx, "exists"] = int(vpath.exists())

    df.to_csv(CSV_PATH, index=False, encoding="utf-8-sig")
    print(f"[INFO] CSV 존재 여부 갱신 완료: {CSV_PATH}")
    return df

# ---------------- Sapiens 실행 ----------------
def run_sapiens_on_frames(video_row, detector, pose_estimator, df, idx):
    """Sapiens 모델 실행 (완료 후 즉시 CSV 저장)"""
    video_filename = video_row["file_name"]
    if video_row["sapiens_done"] == 1: return
    frame_dir = Path(video_row["frames_dir"])
    if not frame_dir.exists(): return

    json_dir = Path(video_row["keypoints_dir"])/f"{Path(video_filename).stem}_JSON"
    json_dir.mkdir(parents=True, exist_ok=True)
    frames = sorted(frame_dir.glob("*.jpg"))

    for idx_frame, fpath in enumerate(tqdm(frames, desc=video_filename, unit="frame")):
        img_bgr = cv2.imread(str(fpath))
        if img_bgr is None: continue
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        try:
            det = inference_detector(detector, img_rgb)
            pred = det.pred_instances.cpu().numpy()
            keep = (pred.labels==0) & (pred.scores>0.5)
            bbs = np.concatenate((pred.bboxes, pred.scores[:,None]), axis=1)[keep]
            if len(bbs)==0: continue
            bbs = bbs[nms(bbs,0.5),:4]

            pose_results = inference_topdown(pose_estimator,img_rgb,bbs)
            data_sample = merge_data_samples(pose_results)
            inst = data_sample.get("pred_instances",None)
            if inst is None: continue
            inst_list = split_instances(inst)

            payload = dict(frame_index=idx_frame,video_name=video_filename,
                           meta_info=pose_estimator.dataset_meta,instance_info=inst_list)
            json_path = json_dir/f"{idx_frame:06d}.json"
            with open(json_path,"w",encoding="utf-8") as f:
                json.dump(to_py(payload),f,ensure_ascii=False,indent=2)
        except Exception as e:
            print(f"[ERROR] {video_filename} frame {idx_frame} → {e}")

    # ✅ 완료 후 CSV 업데이트
    df.loc[idx,"sapiens_done"] = 1
    df.to_csv(CSV_PATH,index=False,encoding="utf-8-sig")
    print(f"[INFO] CSV 업데이트 (sapiens_done=1): {video_filename}")

# ---------------- 메인 ----------------
def main():
    # 1) CSV 업데이트 (존재 여부만)
    df = update_metadata_csv()

    # 2) 프레임 추출
    tasks = {}
    with ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
        for idx, row in df.iterrows():
            if row["exists"] == 1 and row["frames_verified"] == 0:
                tasks[executor.submit(
                    extract_frames, row["video_path"], row["frames_dir"], TARGET_SHORT, JPEG_QUALITY
                )] = idx

        for future in tqdm(as_completed(tasks), total=len(tasks), desc="Frame Extraction"):
            idx = tasks[future]
            msg, n_extracted = future.result()
            print(msg)
            df.loc[idx,"frames_verified"] = 1
            df.loc[idx,"n_extracted_frames"] = n_extracted   # ✅ 추출된 프레임 수 업데이트
            df.to_csv(CSV_PATH, index=False, encoding="utf-8-sig")

    print(f"[INFO] CSV 업데이트 완료 (프레임 추출 후): {CSV_PATH}")

    # 3) Sapiens 실행
    detector = init_detector(DET_CONFIG,DET_CKPT,device="cuda:0")
    detector.cfg = adapt_mmdet_pipeline(detector.cfg)
    pose_estimator = init_pose_estimator(
        POSE_CONFIG,POSE_CKPT,device="cuda:0",
        cfg_options=dict(model=dict(test_cfg=dict(output_heatmaps=False)))
    )
    for idx,row in df.iterrows():
        if row["exists"] == 1 and row["frames_verified"] == 1 and row["sapiens_done"] == 0:
            run_sapiens_on_frames(row, detector, pose_estimator, df, idx)

# ---------------- 실행 ----------------
if __name__=="__main__":
    main()


  from pkg_resources import DistributionNotFound, get_distribution


[INFO] CSV 존재 여부 갱신 완료: ../data/new_data/video_metadata.csv


Frame Extraction:   2% 1/55 [00:15<13:32, 15.04s/it]

[DONE] Hip Flexion Extension3.MP4 1920x1080 → 1280x720, 총 332 프레임


Frame Extraction:   4% 2/55 [00:15<05:41,  6.44s/it]

[DONE] Hip Flexion Extension_bobathtable3.MP4 1920x1080 → 1280x720, 총 346 프레임


Frame Extraction:   5% 3/55 [00:23<06:13,  7.18s/it]

[DONE] Hip Flexion Extension2.MOV 3840x2160 → 1280x720, 총 378 프레임


Frame Extraction:   9% 5/55 [00:30<03:49,  4.59s/it]

[DONE] Hip Flexion Extension_bobathtable1.MOV 1920x1080 → 1280x720, 총 647 프레임
[DONE] Hip Flexion Extension_chair2.MOV 1920x1080 → 1280x720, 총 312 프레임


Frame Extraction:  11% 6/55 [00:30<02:34,  3.15s/it]

[DONE] Hip Flexion Extension_chair3.MP4 1920x1080 → 1280x720, 총 347 프레임


Frame Extraction:  13% 7/55 [00:38<03:45,  4.70s/it]

[DONE] Hip Flexion Extension_Wheelchair2.MOV 1920x1080 → 1280x720, 총 321 프레임


Frame Extraction:  15% 8/55 [00:44<03:54,  4.99s/it]

[DONE] Knee Extension_Chair2.MOV 1920x1080 → 1280x720, 총 292 프레임


Frame Extraction:  16% 9/55 [00:46<03:02,  3.97s/it]

[DONE] Knee Extension_chair3.MP4 1920x1080 → 1280x720, 총 334 프레임


Frame Extraction:  18% 10/55 [00:47<02:14,  3.00s/it]

[DONE] Knee Extension_Wheelchair2.MOV 1920x1080 → 1280x720, 총 313 프레임


Frame Extraction:  20% 11/55 [00:57<03:47,  5.18s/it]

[DONE] Knee Flexion2.MOV 3840x2160 → 1280x720, 총 285 프레임


Frame Extraction:  22% 12/55 [01:19<07:24, 10.33s/it]

[DONE] Biceps Curl_1.MOV 2160x3840 → 720x1280, 총 351 프레임


Frame Extraction:  24% 13/55 [01:25<06:23,  9.13s/it]

[DONE] Ankle Pumping1.MOV 2160x3840 → 720x1280, 총 426 프레임


Frame Extraction:  25% 14/55 [01:27<04:45,  6.97s/it]

[DONE] Biceps Curl_Bilateral1.MOV 2160x3840 → 720x1280, 총 354 프레임


Frame Extraction:  27% 15/55 [01:33<04:21,  6.53s/it]

[DONE] SLR_bobathtable1.MOV 1920x1080 → 1280x720, 총 1039 프레임


Frame Extraction:  29% 16/55 [01:43<05:04,  7.81s/it]

[DONE] Bridge_Dynamic2.MP4 1920x1080 → 1280x720, 총 420 프레임


Frame Extraction:  31% 17/55 [01:44<03:30,  5.53s/it]

[DONE] Bridge_Dynamic3.MP4 1920x1080 → 1280x720, 총 376 프레임


Frame Extraction:  33% 18/55 [01:55<04:29,  7.27s/it]

[DONE] Bridge_Dynamic1.MOV 2160x3840 → 720x1280, 총 417 프레임


Frame Extraction:  35% 19/55 [02:00<03:56,  6.56s/it]

[DONE] Chest Press2.MP4 1920x1080 → 1280x720, 총 359 프레임


Frame Extraction:  36% 20/55 [02:16<05:31,  9.46s/it]

[DONE] Chest Press1.MOV 2160x3840 → 720x1280, 총 390 프레임


Frame Extraction:  38% 21/55 [02:17<03:51,  6.80s/it]

[DONE] Clamshell_Sidelying2.MP4 1920x1080 → 1280x720, 총 372 프레임


Frame Extraction:  40% 22/55 [02:29<04:35,  8.35s/it]

[DONE] Clamshell_Sidelying1.MOV 2160x3840 → 720x1280, 총 397 프레임


Frame Extraction:  42% 23/55 [02:34<03:54,  7.32s/it]

[DONE] Bridge_static1.MOV 2160x3840 → 720x1280, 총 658 프레임


Frame Extraction:  44% 24/55 [02:34<02:44,  5.31s/it]

[DONE] Clamshell_Sidelying3.MP4 1920x1080 → 1280x720, 총 402 프레임


Frame Extraction:  45% 25/55 [02:47<03:47,  7.58s/it]

[DONE] Clamshell_Supine2.MP4 1920x1080 → 1280x720, 총 408 프레임


Frame Extraction:  47% 26/55 [02:49<02:52,  5.94s/it]

[DONE] Clamshell_Supine3.MP4 1920x1080 → 1280x720, 총 373 프레임


Frame Extraction:  49% 27/55 [02:50<02:02,  4.37s/it]

[DONE] Hip abduction3.MP4 1920x1080 → 1280x720, 총 361 프레임


Frame Extraction:  51% 28/55 [02:53<01:49,  4.06s/it]

[DONE] Clamshell_Supine1.MOV 2160x3840 → 720x1280, 총 411 프레임


Frame Extraction:  53% 29/55 [03:06<02:55,  6.75s/it]

[DONE] Hip knee flexion2.MP4 1920x1080 → 1280x720, 총 383 프레임


Frame Extraction:  55% 30/55 [03:08<02:12,  5.29s/it]

[DONE] Hip Extension1.MOV 2800x2160 → 933x720, 총 339 프레임


Frame Extraction:  56% 31/55 [03:10<01:42,  4.26s/it]

[DONE] Hip knee flexion3.MP4 1920x1080 → 1280x720, 총 371 프레임


Frame Extraction:  58% 32/55 [03:22<02:32,  6.61s/it]

[DONE] Knee flexion1.MOV 2658x2160 → 886x720, 총 331 프레임


Frame Extraction:  60% 33/55 [03:25<02:02,  5.56s/it]

[DONE] Knee flexion2.MP4 1920x1080 → 1280x720, 총 362 프레임


Frame Extraction:  62% 34/55 [03:26<01:29,  4.27s/it]

[DONE] Knee flexion3.MP4 1920x1080 → 1280x720, 총 349 프레임


Frame Extraction:  64% 35/55 [03:32<01:32,  4.65s/it]

[DONE] Hip knee flexion1.MOV 2160x3840 → 720x1280, 총 466 프레임


Frame Extraction:  65% 36/55 [03:40<01:46,  5.60s/it]

[DONE] Leg cycle exercise2.MP4 1920x1080 → 1280x720, 총 338 프레임


Frame Extraction:  67% 37/55 [03:42<01:22,  4.61s/it]

[DONE] Leg cycle exercise3.MP4 1920x1080 → 1280x720, 총 345 프레임


Frame Extraction:  69% 38/55 [03:55<02:01,  7.16s/it]

[DONE] Leg cycle exercise1.MOV 2160x3840 → 720x1280, 총 371 프레임


Frame Extraction:  71% 39/55 [04:05<02:09,  8.08s/it]

[DONE] Lumbar rotation1.MOV 2160x3840 → 720x1280, 총 398 프레임


Frame Extraction:  73% 40/55 [04:22<02:38, 10.56s/it]

[DONE] Overhead Triceps Extension2.MP4 1920x1080 → 1280x720, 총 361 프레임


Frame Extraction:  75% 41/55 [04:29<02:14,  9.58s/it]

[DONE] Overhead Triceps Extension1.MOV 2160x3840 → 720x1280, 총 397 프레임


Frame Extraction:  76% 42/55 [04:37<01:57,  9.06s/it]

[DONE] Overhead Triceps Extension3.MP4 1920x1080 → 1280x720, 총 346 프레임


Frame Extraction:  78% 43/55 [04:38<01:20,  6.67s/it]

[DONE] One leg brige_dynamic1.MOV 2160x3840 → 720x1280, 총 667 프레임


Frame Extraction:  80% 44/55 [04:47<01:21,  7.42s/it]

[DONE] One leg bridge_static1.MOV 2160x3840 → 720x1280, 총 771 프레임


Frame Extraction:  82% 45/55 [04:52<01:07,  6.73s/it]

[DONE] Q-setting3.MP4 1920x1080 → 1280x720, 총 348 프레임


Frame Extraction:  84% 46/55 [05:09<01:26,  9.60s/it]

[DONE] PecdecFly1.MOV 2160x3840 → 720x1280, 총 460 프레임


Frame Extraction:  85% 47/55 [05:12<01:00,  7.60s/it]

[DONE] Shoulder Flexion2.MP4 1920x1080 → 1280x720, 총 420 프레임


Frame Extraction:  87% 48/55 [05:27<01:10, 10.03s/it]

[DONE] Shoulder Flexion1.MOV 2160x3840 → 720x1280, 총 453 프레임


Frame Extraction:  89% 49/55 [05:28<00:43,  7.25s/it]

[DONE] Shoulder Abduction1.MOV 2160x3840 → 720x1280, 총 593 프레임


Frame Extraction:  91% 50/55 [05:29<00:27,  5.44s/it]

[DONE] Shoulder Flexion3.MP4 1920x1080 → 1280x720, 총 410 프레임


Frame Extraction:  93% 51/55 [05:31<00:17,  4.46s/it]

[DONE] SLR3.MP4 1920x1080 → 1280x720, 총 417 프레임


Frame Extraction:  95% 52/55 [05:45<00:21,  7.11s/it]

[DONE] Hip Flexion Extension_Wheelchair3.MP4 1920x1080 → 1280x720, 총 349 프레임
[DONE] SLR_bobathtable1.MP4 1920x1080 → 1280x720, 총 347 프레임


Frame Extraction:  98% 54/55 [05:48<00:04,  4.38s/it]

[DONE] Pec dec Fly2.MP4 1920x1080 → 1280x720, 총 372 프레임


Frame Extraction: 100% 55/55 [05:50<00:00,  6.37s/it]

[DONE] Pec dec Fly3.MP4 1920x1080 → 1280x720, 총 370 프레임
[INFO] CSV 업데이트 완료 (프레임 추출 후): ../data/new_data/video_metadata.csv



  _bootstrap._exec(spec, module)


Loads checkpoint by local backend from path: ../sapiens/pose/checkpoints/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth
Loads checkpoint by local backend from path: ../sapiens/pose/checkpoints/sapiens_0.3b/sapiens_0.3b_coco_best_coco_AP_796.pth
The model and loaded state dict do not match exactly

missing keys in source state_dict: head.deconv_layers.1.weight, head.deconv_layers.1.bias, head.deconv_layers.1.running_mean, head.deconv_layers.1.running_var, head.deconv_layers.4.weight, head.deconv_layers.4.bias, head.deconv_layers.4.running_mean, head.deconv_layers.4.running_var, head.conv_layers.1.weight, head.conv_layers.1.bias, head.conv_layers.1.running_mean, head.conv_layers.1.running_var, head.conv_layers.4.weight, head.conv_layers.4.bias, head.conv_layers.4.running_mean, head.conv_layers.4.running_var



  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Biceps Curl2.MOV:   8% 23/288 [00:12<02:29,  1.78frame/s]


KeyboardInterrupt: 