In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import json
import random
from pathlib import Path
from tqdm import tqdm
import cv2

import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch

In [None]:
RAW_FRAME_DIR = "/content/drive/MyDrive/train_videos/selected_frames"
FRAME_META_PATH = "/content/drive/MyDrive/train_videos/selected_frame_metadata.json"

OUT_BASE = "/content/drive/MyDrive/selected_data"
IMG_DIR = os.path.join(OUT_BASE, "images")
META_DIR = os.path.join(OUT_BASE, "meta")

os.makedirs(IMG_DIR, exist_ok=True)
os.makedirs(META_DIR, exist_ok=True)

print("저장 경로:", OUT_BASE)

저장 경로: /content/drive/MyDrive/selected_data


In [None]:
frame_paths = sorted([
    p for p in Path(RAW_FRAME_DIR).iterdir()
    if p.suffix.lower() in {".png", ".jpg", ".jpeg"}
])

print("전체 frame 수:", len(frame_paths))

전체 frame 수: 5200


In [None]:
with open(FRAME_META_PATH, "r") as f:
    frame_metadata = json.load(f)

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

preprocessor = A.Compose(
    [
        # 1️⃣ 짧은 변 기준으로 resize (비율 유지)
        A.SmallestMaxSize(
            max_size=224,
            interpolation=cv2.INTER_NEAREST
        ),

        # 2️⃣ 중앙에서 224×224 crop
        A.CenterCrop(
            height=224,
            width=224
        ),

        # 3️⃣ tensor 변환 (normalize 없음)
        ToTensorV2()
    ],
    p=1.0,
    is_check_shapes=False
)

In [None]:
print("IMG_DIR exists:", os.path.exists(IMG_DIR))
print("META_DIR exists:", os.path.exists(META_DIR))

IMG_DIR exists: True
META_DIR exists: True


In [None]:
num_samples = 5200 # Adjust this number as needed
sampled_frames = random.sample(frame_paths, min(num_samples, len(frame_paths)))

for p in tqdm(sampled_frames, desc="Processing & saving"):
    fname = p.name                              # bchnbulevv__001.png
    base = os.path.splitext(fname)[0]           # bchnbulevv__001

    img = cv2.imread(str(p))
    if img is None:
        continue

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    tensor = preprocessor(image=img)["image"]

    out_img = tensor.permute(1, 2, 0).numpy()
    out_img = out_img.astype("uint8")            # ❗ *255 제거

    cv2.imwrite(
        os.path.join(IMG_DIR, base + ".jpg"),
        cv2.cvtColor(out_img, cv2.COLOR_RGB2BGR)
    )

    meta = frame_metadata.get(fname, {})
    meta.update({
        "frame_name": fname,
        "video_id": fname.split("__")[0]
    })

    json_path = os.path.join(META_DIR, base + ".json")
    with open(json_path, "w") as f:
        json.dump(meta, f, indent=2)

Processing & saving: 100%|██████████| 5200/5200 [1:23:39<00:00,  1.04it/s]


In [None]:
print("저장된 이미지 수:", len(os.listdir(IMG_DIR)))
print("저장된 JSON 수:", len(os.listdir(META_DIR)))

저장된 이미지 수: 5200
저장된 JSON 수: 5200
