In [25]:
import os
import csv
from pathlib import Path
from loguru import logger
import cv2
import random
import json

In [30]:
VIDEOS_FOLDER = "videos"
DATASET_FOLDER = "dataset"
IMAGES_FOLDER = f"{DATASET_FOLDER}/images"
METADATA_FILE = f"{DATASET_FOLDER}/metadata.csv"

In [4]:
os.makedirs(IMAGES_FOLDER, exist_ok=True)

In [55]:
with open(METADATA_FILE, mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["image_id", "elapsed_time_seconds"])

In [56]:
for video_file in Path(VIDEOS_FOLDER).glob("*.mp4"):
    cap = cv2.VideoCapture(str(video_file))

    if not cap.isOpened():
        logger.error(f"Failed to open {video_file}")
        continue

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_duration = frame_count / fps

    logger.info(f"Processing {video_file.name}")

    elapsed_time = 0
    log_timer = 0

    while elapsed_time < video_duration:
        random_step = random.randint(1, 10)
        elapsed_time += random_step
        log_timer += random_step

        if elapsed_time > video_duration:
            break

        frame_number = int(elapsed_time * fps)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        ret, frame = cap.read()

        if not ret:
            logger.error(f"Failed to read frame at {elapsed_time:.2f}s in {video_file.name}")
            break

        image_id = f"{video_file.name}_{int(elapsed_time)}.jpg"
        image_path = os.path.join(IMAGES_FOLDER, image_id)

        cv2.imwrite(image_path, frame)

        with open(METADATA_FILE, mode="a", newline="") as file:
            writer = csv.writer(file)
            writer.writerow([image_id, elapsed_time])

        if log_timer >= 60:
            logger.info(f"Processed {elapsed_time:.2f}s of {video_file.name}")
            log_timer = 0

    cap.release()

[32m2024-12-05 00:09:07.791[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1mProcessing vid_20241204_221801.mp4[0m
[32m2024-12-05 00:09:08.619[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1mProcessed 61.00s of vid_20241204_221801.mp4[0m
[32m2024-12-05 00:09:09.424[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1mProcessed 125.00s of vid_20241204_221801.mp4[0m
[32m2024-12-05 00:09:10.109[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1mProcessed 190.00s of vid_20241204_221801.mp4[0m
[32m2024-12-05 00:09:10.720[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1mProcessed 250.00s of vid_20241204_221801.mp4[0m
[32m2024-12-05 00:09:11.290[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1mProcessing vid_20241204_220438.mp4[0m
[32m2024-12-05 00:09:12.024[0m | [1mINFO    [0m | [36m__main__[0m:[36m<m

In [51]:
DATASET_ID = "idwntl/elapsed-thermal-wheel"

In [53]:
dataset_meta = dict(
    id=DATASET_ID,
    title="Elapsed Thermal Wheel",
    licenses=[{"name": "CC0-1.0"}],
)

with open(os.path.join(DATASET_FOLDER, "dataset-metadata.json"), 'w') as f:
    json.dump(dataset_meta, f)

In [57]:
!kaggle datasets create -p dataset/ -u -r zip

Starting upload for file images.zip
100%|██████████████████████████████████████| 19.6M/19.6M [00:02<00:00, 6.94MB/s]
Upload successful: images.zip (20MB)
Starting upload for file metadata.csv
100%|██████████████████████████████████████| 6.46k/6.46k [00:00<00:00, 16.2kB/s]
Upload successful: metadata.csv (6KB)
Your public Dataset is being created. Please check progress at https://www.kaggle.com/datasets/idwntl/elapsed-thermal-wheel
