In [2]:
import os, shutil, glob

# New small dataset folder
base = "ATCC_dataset"

folders = [
    "images/train",
    "images/val",
    "labels/train",
    "labels/val"
]

for f in folders:
    os.makedirs(os.path.join(base, f), exist_ok=True)

print("Small dataset folder created.")

source_img = "ATCC_dataset/images/train"
source_lab = "ATCC_dataset/labels/train"

images = glob.glob(source_img + "/*.jpg")
images = images[:400]   # only 400 images

# 350 train, 50 val
for img_path in images[:350]:
    filename = os.path.basename(img_path)
    label = filename.replace(".jpg", ".txt")

    shutil.copy(img_path, f"{base}/images/train/{filename}")
    shutil.copy(f"{source_lab}/{label}", f"{base}/labels/train/{label}")

for img_path in images[350:400]:
    filename = os.path.basename(img_path)
    label = filename.replace(".jpg", ".txt")

    shutil.copy(img_path, f"{base}/images/val/{filename}")
    shutil.copy(f"{source_lab}/{label}", f"{base}/labels/val/{label}")

print("Copied 350 train + 50 val images for small dataset!")



Small dataset folder created.
Copied 350 train + 50 val images for small dataset!


In [3]:
import cv2
import os
from pathlib import Path

VIDEO_DIR = Path("BDDA/training/camera_videos")   # where BDDA videos are
OUT_DIR = Path("bdda_frames")                     # output folder
OUT_DIR.mkdir(exist_ok=True)

FRAME_INTERVAL = 15   # save every 15th frame
MAX_FRAMES = 600      # total frames to save

def main():
    total_saved = 0

    videos = sorted(VIDEO_DIR.glob("*.mp4"))
    if not videos:
        print("‚ùå No videos found in BDDA folder.")
        return

    print(f"üé¨ Found {len(videos)} BDDA videos. Extracting frames...")

    for video in videos:
        if total_saved >= MAX_FRAMES:
            break

        cap = cv2.VideoCapture(str(video))
        if not cap.isOpened():
            continue

        frame_id = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            if frame_id % FRAME_INTERVAL == 0:
                save_path = OUT_DIR / f"{video.stem}_frame_{frame_id:05d}.jpg"
                cv2.imwrite(str(save_path), frame)
                total_saved += 1

                if total_saved % 50 == 0:
                    print(f"Saved {total_saved} frames...")

                if total_saved >= MAX_FRAMES:
                    break

            frame_id += 1

        cap.release()

    print(f"‚úÖ DONE. Total frames saved: {total_saved}")

if __name__ == "__main__":
    main()


üé¨ Found 926 BDDA videos. Extracting frames...
Saved 50 frames...
Saved 100 frames...
Saved 150 frames...
Saved 200 frames...
Saved 250 frames...
Saved 300 frames...
Saved 350 frames...
Saved 400 frames...
Saved 450 frames...
Saved 500 frames...
Saved 550 frames...
Saved 600 frames...
‚úÖ DONE. Total frames saved: 600


In [4]:

import os, shutil, glob
from pathlib import Path

BASE = Path("ATCC_dataset")
FRAMES = Path("bdda_frames")

for folder in ["images/train", "images/val", "labels/train", "labels/val"]:
    (BASE / folder).mkdir(parents=True, exist_ok=True)

# Clear existing dataset
for folder in ["images/train", "images/val", "labels/train", "labels/val"]:
    for f in (BASE / folder).glob("*"):
        f.unlink()

imgs = sorted(FRAMES.glob("*.jpg"))
if len(imgs) == 0:
    raise RuntimeError("‚ùå No frames found! Run extract_bdda_frames.py first.")

imgs = imgs[:400]  # limit to 400 images
train_imgs = imgs[:350]
val_imgs = imgs[350:]

for img in train_imgs:
    shutil.copy(img, BASE / "images/train" / img.name)

for img in val_imgs:
    shutil.copy(img, BASE / "images/val" / img.name)

print("‚úÖ Dataset created: 350 train images, 50 val images.")


‚úÖ Dataset created: 350 train images, 50 val images.
