In [None]:
# 라이브러리 호출
import cv2
import os
from google.colab import drive

drive.mount('/content/drive')

print(cv2.__version__)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
4.12.0


video -> frame

In [None]:
base_dir = '/content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/'

output_root = os.path.join(base_dir, "video_to_frame")
os.makedirs(output_root, exist_ok=True)

for file in os.listdir(base_dir):
    if not file.endswith(".mp4"):
        continue

    video_path = os.path.join(base_dir, file)
    video_name = os.path.splitext(file)[0]

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)

    if fps <= 0:
        print(f"[SKIP] {file}: FPS 정보 X")
        continue

    interval = max(1, round(fps/5))

    frame_idx = 0
    saved = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx%interval == 0:
            cv2.imwrite(
                os.path.join(output_root, f"{video_name}_{saved:04d}.png"), frame
            )

            saved += 1

        frame_idx += 1

    cap.release()
    print(f"[DONE] {file} -> {saved} frames")

[DONE] abarnvbtwb.mp4 -> 50 frames
[DONE] aagfhgtpmv.mp4 -> 50 frames
[DONE] aapnvogymq.mp4 -> 50 frames
[DONE] abofeumbvv.mp4 -> 50 frames
[DONE] aevrfsexku.mp4 -> 50 frames
[DONE] aelzhcnwgf.mp4 -> 50 frames
[DONE] acxwigylke.mp4 -> 50 frames
[DONE] acqfdwsrhi.mp4 -> 50 frames
[DONE] adylbeequz.mp4 -> 50 frames
[DONE] adhsbajydo.mp4 -> 50 frames
[DONE] agqphdxmwt.mp4 -> 50 frames
[DONE] aelfnikyqj.mp4 -> 50 frames
[DONE] acifjvzvpm.mp4 -> 50 frames
[DONE] aettqgevhz.mp4 -> 50 frames
[DONE] abqwwspghj.mp4 -> 50 frames
[DONE] acxnxvbsxk.mp4 -> 50 frames
[DONE] agrmhtjdlk.mp4 -> 50 frames
[DONE] ahbweevwpv.mp4 -> 50 frames
[DONE] aczrgyricp.mp4 -> 50 frames
[DONE] agdkmztvby.mp4 -> 50 frames
[DONE] afoovlsmtx.mp4 -> 50 frames
[DONE] adohikbdaz.mp4 -> 50 frames
[DONE] altziddtxi.mp4 -> 50 frames
[DONE] amowujxmzc.mp4 -> 50 frames
[DONE] alaijyygdv.mp4 -> 50 frames
[DONE] aladcziidp.mp4 -> 50 frames
[DONE] andaxzscny.mp4 -> 50 frames
[DONE] aknmpoonls.mp4 -> 50 frames
[DONE] akxoopqjqz.mp

프레임용 json 파일 생성

In [None]:
import json

json_path = "/content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/metadata.json"

In [None]:
import os
import json

base_dir = "/content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/"
frame_dir = os.path.join(base_dir, "video_to_frame")

video_json_path = os.path.join(base_dir, "metadata.json")
frame_json_path = os.path.join(base_dir, "frame_metadata.json")

# 1) 영상 메타 로드
with open(video_json_path, "r", encoding="utf-8") as f:
    video_meta = json.load(f)

frame_meta = {}
matched = 0
unmatched = 0

# 2) 프레임 파일 순회
for frame_file in os.listdir(frame_dir):
    if not frame_file.lower().endswith((".png", ".jpg", ".jpeg")):
        continue

    # emfbhytfhc_0000.png -> emfbhytfhc
    video_stem = os.path.splitext(frame_file)[0].rsplit("_", 1)[0]
    video_name = video_stem + ".mp4"

    if video_name not in video_meta:
        unmatched += 1
        continue

    info = video_meta[video_name]
    frame_meta[frame_file] = {
        "label": info["label"],
        "split": info["split"],
        "video": video_name,
        "original": info["original"]
    }
    matched += 1

print("matched:", matched, "unmatched:", unmatched)

# 3) 저장
with open(frame_json_path, "w", encoding="utf-8") as f:
    json.dump(frame_meta, f, indent=2, ensure_ascii=False)

print("saved:", frame_json_path, "entries:", len(frame_meta))


matched: 20000 unmatched: 0
saved: /content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/frame_metadata.json entries: 20000


In [None]:
import json

with open("/content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/frame_metadata.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print(len(data))

20000


data sampling -> 전처리 로직 확인용

In [None]:
import os
import shutil
from collections import defaultdict

# 경로 설정
output_dir = os.path.join(base_dir, "selected_frames")

os.makedirs(output_dir, exist_ok=True)

# 영상별 프레임 그룹핑
video_frames = defaultdict(list)

for file in os.listdir(frame_dir):
    if not file.lower().endswith(".png"):
        continue

    # emfbhytfhc_0007.png -> emfbhytfhc
    video_name = os.path.splitext(file)[0].rsplit("_", 1)[0]
    video_frames[video_name].append(file)

# 각 영상에서 0001~0013만 복사
for video, frames in video_frames.items():
    for i in range(1, 14):  # 1 ~ 13
        target = f"{video}_{i:04d}.png"
        src_path = os.path.join(frame_dir, target)

        if not os.path.exists(src_path):
          print('frame 부족')
          continue  # 프레임 부족한 영상은 그냥 스킵

        dst_path = os.path.join(output_dir, target)
        shutil.copy(src_path, dst_path)

print("각 영상당 13개 프레임 복사 완료")

각 영상당 13개 프레임 복사 완료


In [None]:
# 샘플 데이터에 대한 json 파일 생성

frame_json_path = os.path.join(base_dir, "frame_metadata.json")
selected_dir = os.path.join(base_dir, "selected_frames")
output_json_path = os.path.join(base_dir, "selected_frame_metadata.json")

# 1) 전체 프레임 메타 로드
with open(frame_json_path, "r", encoding="utf-8") as f:
    frame_meta = json.load(f)

# 2) selected_frames에 있는 파일 목록
selected_files = {
    f for f in os.listdir(selected_dir)
    if f.lower().endswith((".png", ".jpg", ".jpeg"))
}

print("selected files:", len(selected_files))

# 3) 선택된 프레임만 필터링
selected_meta = {}

for fname in selected_files:
    if fname in frame_meta:
        selected_meta[fname] = frame_meta[fname]

# 4) 저장
with open(output_json_path, "w", encoding="utf-8") as f:
    json.dump(selected_meta, f, indent=2, ensure_ascii=False)

print("saved:", output_json_path)
print("entries:", len(selected_meta))


selected files: 5200
saved: /content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/selected_frame_metadata.json
entries: 5200


In [None]:
with open("/content/drive/MyDrive/Colab Notebooks/data/train_videos (1)/selected_frame_metadata.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print(len(data))

5200
