In [5]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pandas as pd
from pathlib import Path

CSV_PATH = Path("../data/new_data/video_metadata.csv")

def eda_mismatched_frames():
    # CSV 읽기
    df = pd.read_csv(CSV_PATH, encoding="utf-8-sig")

    # 프레임 차이 계산
    df["frame_diff"] = df["n_extracted_frames"] - df["num_frames"]

    mismatches = df[df["frame_diff"] != 0]

    print("[INFO] CSV 로드 완료")
    print(f"총 행 개수: {len(df)}")
    print(f"프레임 불일치 개수: {len(mismatches)}")
    print()

    # 차이 통계
    print("[EDA] 프레임 차이 통계")
    print(mismatches["frame_diff"].describe())
    print()

    # 전부 출력 (보기 쉽게 정렬)
    print("[불일치 전체 샘플]")
    for _, row in mismatches.sort_values(by="frame_diff").iterrows():
        print(f"🎥 {row['file_name']:<35} "
              f"num_frames={row['num_frames']:<6} "
              f"n_extracted={row['n_extracted_frames']:<6} "
              f"diff={row['frame_diff']:+5d}")

if __name__ == "__main__":
    eda_mismatched_frames()


[INFO] sapiens_done 전체를 0으로 초기화 완료
sapiens_done
0    1019
Name: count, dtype: int64


In [4]:
import pandas as pd
import cv2
from pathlib import Path

CSV_PATH = "../data/new_data/video_metadata.csv"

def recount_frames(video_list):
    df = pd.read_csv(CSV_PATH, encoding="utf-8-sig")

    results = []

    for fname in video_list:
        row = df[df["file_name"] == fname]
        if row.empty:
            print(f"[WARN] {fname} → CSV에 없음")
            continue

        video_path = Path(row.iloc[0]["video_path"])  # 상대경로 그대로 읽음
        if not video_path.exists():
            print(f"[WARN] {fname} → 파일 없음 ({video_path})")
            continue

        # OpenCV로 프레임 수 다시 세기
        cap = cv2.VideoCapture(str(video_path))
        total_frames = 0
        while True:
            ret, _ = cap.read()
            if not ret:
                break
            total_frames += 1
        cap.release()

        results.append({
            "file_name": fname,
            "csv_num_frames": int(row.iloc[0]["num_frames"]),
            "csv_n_extracted": int(row.iloc[0]["n_extracted_frames"]),
            "recounted_frames": total_frames,
            "diff_csv_vs_recount": total_frames - int(row.iloc[0]["num_frames"]),
            "diff_extracted_vs_recount": total_frames - int(row.iloc[0]["n_extracted_frames"]),
        })

    # 보기 좋게 출력
    for r in results:
        print("="*80)
        print(f"🎥 {r['file_name']}")
        print(f"  CSV num_frames        : {r['csv_num_frames']}")
        print(f"  CSV n_extracted_frames: {r['csv_n_extracted']}")
        print(f"  OpenCV recount_frames : {r['recounted_frames']}")
        print(f"  ➡️ diff (recount - csv_num_frames)        = {r['diff_csv_vs_recount']}")
        print(f"  ➡️ diff (recount - csv_n_extracted_frames) = {r['diff_extracted_vs_recount']}")

    return results

if __name__ == "__main__":
    # 문제 의심 영상들만 넣기
    recount_frames([
        "Biceps Curl2.MOV",
        "Hip Extension2.MOV"
    ])


🎥 Biceps Curl2.MOV
  CSV num_frames        : 303
  CSV n_extracted_frames: 337
  OpenCV recount_frames : 288
  ➡️ diff (recount - csv_num_frames)        = -15
  ➡️ diff (recount - csv_n_extracted_frames) = -49
🎥 Hip Extension2.MOV
  CSV num_frames        : 301
  CSV n_extracted_frames: 353
  OpenCV recount_frames : 293
  ➡️ diff (recount - csv_num_frames)        = -8
  ➡️ diff (recount - csv_n_extracted_frames) = -60
