# Create Dataset and helper functions

In [3]:
import cv2
import os
import shutil
from pathlib import Path

In [3]:
def save_frame(path, frame):
    """
    Saves grayscale tensor to PNG image
    :param path: The path (with .png)
    :param frame: Frame to save
    :return: boolean: success
    """
    if not path.endswith('.png'):
        print('Path does not end with .png')
        return False
    return cv2.imwrite(path, frame, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])

In [4]:
def extract_frames(video_path, output_folder, interval_sec=0.5):
    """
    Extracts frames from video
    :param video_path: The video path
    :param output_folder: The output folder where every frame will be saved
    :param interval_sec: The interval between frames
    :return: boolean: success
    """
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Cannot open video.")
        return False

    _, video_name = os.path.split(video_path)
    video_name_without_ext, _ = os.path.splitext(video_name)

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    print(f"FPS: {fps}, Total Frames: {total_frames}, Duration: {duration:.2f} sec")

    frame_interval = int(fps * interval_sec)
    frame_idx = 0
    saved_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_interval == 0:
            filename = os.path.join(output_folder, video_name_without_ext + f"_{saved_count:04d}.png")
            save_frame(filename, frame)
            saved_count += 1

        frame_idx += 1

    cap.release()
    print(f"Extracted {saved_count} frames.")
    return True

In [5]:
extract_frames(os.path.join("test", "250905_GHW_1_HD_1_M0.avi"), output_folder=os.path.join("test", "250905_GHW_1_HD_1_M0"), interval_sec=0.2)

FPS: 30.0, Total Frames: 255, Duration: 8.50 sec
Extracted 43 frames.


True

In [6]:
def duplicate_file(src, dest):
    return shutil.copyfile(src, dest)

In [None]:
for i in range(1, 43):
    duplicate_file("data/eye/train_masks/250905_GHW_1_HD_1_M0_0000_mask.png", f"data/eye/train_masks/250905_GHW_1_HD_1_M0_{i:04d}_mask.png")

In [4]:
def rename_files_sequentially(directory: str, extension: str = ".png"):
    """
    Renames all files in a directory to sequential names like 0001.png, 0002.png, ...

    Args:
        directory (str): Path to the directory containing files.
        extension (str): Desired extension for renamed files (default: ".png").
    """
    dir_path = Path(directory).resolve()

    files = sorted([f for f in dir_path.iterdir() if f.is_file()])

    for i, file in enumerate(files, start=1):
        new_name = f"{i:04d}{extension}"
        new_path = dir_path / new_name
        file.rename(new_path)

    print(f"Renamed {len(files)} files in {dir_path}")

In [5]:
rename_files_sequentially("./data/eye/train")

Renamed 804 files in C:\Users\Domodekavkaz\Documents\Stage\papillae_detection\data\eye\train


---

In [9]:
def read_paths(file_path: str):
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            path = line.strip()
            if path:  # skip empty lines
                yield str(Path(path).resolve())

In [10]:
i = 0
for p in read_paths("pngs.txt"):
    duplicate_file(p, f"data/eye/train/{i:04d}.png")
    i += 1