In [None]:
import os
import subprocess
import shutil
from collections import defaultdict

# Mapping of device metadata names to desired naming conventions
device_name_mapping = {
    "Canon EOS 6D": "canon_eos_6d",
    "Canon EOS 6D Mark II": "canon_eos_6d_mark_ii",
    "Canon EOS 80D": "canon_eos_80d",
    "Canon EOS M6": "canon_eos_m6",
    "X-A10": "fujifilm_x_a10",
    "NIKON D7200": "nikon_d7200",
    "DC-TZ90": "panasonic_lumix_dc_tz90",
    "E-M10MarkII": "olympus_e_m10_mark_ii",
    "sd Quattro": "sigma_sd_quattro",
    "ILCA-68": "sony_alpha_68",
    "DSC-RX100": "sony_rx100",
}


# Differentiate between devices with the same model name
def differentiate_device(metadata):
    device = metadata.get("Model", "unknown_device")
    if device == "X-A10":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "70057SL59307" in serial_number:
            return "fujifilm_x_a10_1"
        elif "7DC57UL44710" in serial_number:
            return "fujifilm_x_a10_2"
    elif device == "DC-TZ90":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "(XCL)" in serial_number:
            return "panasonic_lumix_dc_tz90_1"
        elif "(XCR)" in serial_number:
            return "panasonic_lumix_dc_tz90_2"
    elif device == "DSC-RX100":
        software = metadata.get("Software", "")
        if "v1.00" in software:
            return "sony_rx100_1"
        elif "v2.00" in software:
            return "sony_rx100_2"
    return device_name_mapping.get(device, "unknown_device")


# Function to read metadata using ExifTool
def read_metadata(file_path):
    try:
        result = subprocess.run(
            [
                "../tools/executables/exiftool.exe",
                "-Model",
                "-ISO",
                "-ExposureTime",
                "-InternalSerialNumber",
                "-Software",
                "-j",
                file_path,
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        metadata = eval(result.stdout.strip())
        if metadata:
            return metadata[0]
    except subprocess.CalledProcessError as e:
        print(f"Error reading metadata from {file_path}: {e}")
    return {}


# Function to rename and sort files
def rename_and_sort_files(input_dir, output_dir, scene_number):
    file_count = defaultdict(lambda: defaultdict(int))
    for root, _, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(root, file)
            if file.lower().endswith(
                (
                    "jpg",
                    "jpeg",
                    "tif",
                    "tiff",
                    "cr2",
                    "nef",
                    "arw",
                    "rw2",
                    "raf",
                    "x3f",
                    "orf",
                )
            ):
                metadata = read_metadata(file_path)
                device_name = differentiate_device(metadata)
                iso = metadata.get("ISO", "unknown_iso")
                exposure_time = str(
                    metadata.get("ExposureTime", "unknown_exposure")
                ).replace("/", "_")

                if not all([device_name, iso, exposure_time]):
                    print(f"Skipping file {file_path} due to missing metadata.")
                    continue

                base_name = (
                    f"{device_name}_scene{scene_number}_ISO{iso}_{exposure_time}"
                )
                extension = os.path.splitext(file)[1].lower().replace(".", "")

                sequence_number = ""
                if file_count[base_name][extension] > 0:
                    sequence_number = f"_{file_count[base_name][extension]}"
                new_name = f"{base_name}{sequence_number}.{extension}"

                file_count[base_name][extension] += 1

                # Determine the directory structure
                if file_count[base_name][extension] == 1:
                    folder_type = "AEB"
                    new_path = os.path.join(
                        output_dir,
                        device_name,
                        f"scene_{scene_number}",
                        folder_type,
                        f"ISO{iso}",
                        extension,
                        new_name,
                    )
                else:
                    folder_type = "Burst"
                    new_path = os.path.join(
                        output_dir,
                        device_name,
                        f"scene_{scene_number}",
                        folder_type,
                        f"ISO{iso}",
                        f"{exposure_time}",
                        extension,
                        new_name,
                    )

                print(
                    f"device_name: {device_name}, iso: {iso}, exposure_time: {exposure_time}, extension: {extension}, sequence_number: {sequence_number}"
                )
                print(f"new_path: {new_path}")

                os.makedirs(os.path.dirname(new_path), exist_ok=True)
                shutil.move(file_path, new_path)
                print(f"Moved {file_path} to {new_path}")


# Example usage
scenes = [
    {
        "input_dir": "",
        "output_dir": "",
        "scene_number": 1,
    },
    {
        "input_dir": "",
        "output_dir": "",
        "scene_number": 1,
    },
]

for scene in scenes:
    rename_and_sort_files(
        scene["input_dir"], scene["output_dir"], scene["scene_number"]
    )

# AEB and Burst Organization Script
### Description:
This script reads metadata from image files, differentiates devices based on specific metadata fields, and renames and sorts the files into a structured directory format. It is designed to work with various camera models and handle specific cases where multiple devices share the same model name.

### Usage:
1. Update the `input_directory` variable to point to your source directory containing the image files.
2. Update the `output_directory` variable to point to the destination directory where you want the sorted files to be saved.
3. Set the `scene_number` for the current scene being processed.
4. Run the script to rename and sort the files based on their metadata.


In [None]:
import os
import subprocess
import shutil
from collections import defaultdict

# Mapping of device metadata names to desired naming conventions
device_name_mapping = {
    "Canon EOS 6D": "canon_eos_6d",
    "Canon EOS 6D Mark II": "canon_eos_6d_mark_ii",
    "Canon EOS 80D": "canon_eos_80d",
    "Canon EOS M6": "canon_eos_m6",
    "X-A10": "fujifilm_x_a10",
    "NIKON D7200": "nikon_d7200",
    "DC-TZ90": "panasonic_lumix_dc_tz90",
    "E-M10MarkII": "olympus_e_m10_mark_ii",
    "sd Quattro": "sigma_sd_quattro",
    "ILCA-68": "sony_alpha_68",
    "DSC-RX100": "sony_rx100",
}


# Differentiate between devices with the same model name
def differentiate_device(metadata):
    """
    Differentiate devices based on specific metadata fields to uniquely identify each device.

    Parameters:
    metadata (dict): Metadata of the image file.

    Returns:
    str: Device name based on the mapping or specific metadata fields.
    """
    device = metadata.get("Model", "unknown_device")
    if device == "X-A10":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "70057SL59307" in serial_number:
            return "fujifilm_x_a10_1"
        elif "7DC57UL44710" in serial_number:
            return "fujifilm_x_a10_2"
    elif device == "DC-TZ90":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "(XCL)" in serial_number:
            return "panasonic_lumix_dc_tz90_1"
        elif "(XCR)" in serial_number:
            return "panasonic_lumix_dc_tz90_2"
    elif device == "DSC-RX100":
        software = metadata.get("Software", "")
        if "v1.00" in software:
            return "sony_rx100_1"
        elif "v2.00" in software:
            return "sony_rx100_2"
    return device_name_mapping.get(device, "unknown_device")


# Function to read metadata using ExifTool
def read_metadata(file_path):
    """
    Read metadata from an image file using ExifTool.

    Parameters:
    file_path (str): Path to the image file.

    Returns:
    dict: Extracted metadata of the image file.
    """
    try:
        result = subprocess.run(
            [
                "../tools/executables/exiftool.exe",
                "-Model",
                "-ISO",
                "-ExposureTime",
                "-InternalSerialNumber",
                "-Software",
                "-j",
                file_path,
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        metadata = eval(result.stdout.strip())
        if metadata:
            return metadata[0]
    except subprocess.CalledProcessError as e:
        print(f"Error reading metadata from {file_path}: {e}")
    return {}


# Function to rename and sort files
def rename_and_sort_files(input_dir, output_dir, scene_number):
    """
    Rename and sort image files based on their metadata into a structured directory format.

    Parameters:
    input_dir (str): Directory containing the image files to be sorted.
    output_dir (str): Directory to save the sorted image files.
    scene_number (int): Scene number for the current set of images.
    """
    file_count = defaultdict(lambda: defaultdict(int))
    for root, _, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(root, file)
            if file.lower().endswith(
                (
                    "jpg",
                    "jpeg",
                    "tif",
                    "tiff",
                    "cr2",
                    "nef",
                    "arw",
                    "rw2",
                    "raf",
                    "x3f",
                    "orf",
                )
            ):
                metadata = read_metadata(file_path)
                device_name = differentiate_device(metadata)
                iso = metadata.get("ISO", "unknown_iso")
                exposure_time = str(
                    metadata.get("ExposureTime", "unknown_exposure")
                ).replace("/", "_")

                if not all([device_name, iso, exposure_time]):
                    print(f"Skipping file {file_path} due to missing metadata.")
                    continue

                base_name = (
                    f"{device_name}_scene{scene_number}_ISO{iso}_{exposure_time}"
                )
                extension = os.path.splitext(file)[1].lower().replace(".", "")

                sequence_number = ""
                if file_count[base_name][extension] > 0:
                    sequence_number = f"_{file_count[base_name][extension]}"
                new_name = f"{base_name}{sequence_number}.{extension}"

                file_count[base_name][extension] += 1

                # Determine the directory structure
                if file_count[base_name][extension] == 1:
                    folder_type = "AEB"
                    new_path = os.path.join(
                        output_dir,
                        device_name,
                        f"scene_{scene_number}",
                        folder_type,
                        f"ISO{iso}",
                        extension,
                        new_name,
                    )
                else:
                    folder_type = "Burst"
                    new_path = os.path.join(
                        output_dir,
                        device_name,
                        f"scene_{scene_number}",
                        folder_type,
                        f"ISO{iso}",
                        f"{exposure_time}",
                        extension,
                        new_name,
                    )

                print(
                    f"device_name: {device_name}, iso: {iso}, exposure_time: {exposure_time}, extension: {extension}, sequence_number: {sequence_number}"
                )
                print(f"new_path: {new_path}")

                os.makedirs(os.path.dirname(new_path), exist_ok=True)
                shutil.move(file_path, new_path)
                print(f"Moved {file_path} to {new_path}")


# Example usage
scenes = [
    {
        "input_dir": "../dataset/unorganized_images/Scene 1",
        "output_dir": "../dataset/WIFD",
        "scene_number": 1,
    },
    {
        "input_dir": "../dataset/Burst Shot/Scene 2",
        "output_dir": "../dataset/WIFD",
        "scene_number": 2,
    },
    # Add more scenes as needed
]

for scene in scenes:
    rename_and_sort_files(
        scene["input_dir"], scene["output_dir"], scene["scene_number"]
    )

---
# Referemce/SDR Image Organization Script
### Description:
This script reads metadata from image files, differentiates devices based on specific metadata fields, and renames and sorts the files into a structured directory format. The script is designed to work with a variety of camera models and handle specific cases where multiple devices share the same model name.

### Usage:
1. Update the `input_directory` variable to point to your source directory containing the image files.
2. Update the `output_directory` variable to point to the destination directory where you want the sorted files to be saved.
3. Run the script to rename and sort the files based on their metadata.

### Reference Images

In [None]:
import os
import subprocess
import shutil
from collections import defaultdict

# Mapping of device metadata names to desired naming conventions
device_name_mapping = {
    "Canon EOS 6D": "canon_eos_6d",
    "Canon EOS 6D Mark II": "canon_eos_6d_mark_ii",
    "Canon EOS 80D": "canon_eos_80d",
    "Canon EOS M6": "canon_eos_m6",
    "X-A10": "fujifilm_x_a10",
    "NIKON D7200": "nikon_d7200",
    "DC-TZ90": "panasonic_lumix_dc_tz90",
    "E-M10MarkII": "olympus_e_m10_mark_ii",
    "sd Quattro": "sigma_sd_quattro",
    "ILCA-68": "sony_alpha_68",
    "DSC-RX100": "sony_rx100",
}


def differentiate_device(metadata):
    """
    Differentiate devices based on specific metadata fields to uniquely identify each device.

    Parameters:
    metadata (dict): Metadata of the image file.

    Returns:
    str: Device name based on the mapping or specific metadata fields.
    """
    device = metadata.get("Model", "unknown_device")
    if device == "X-A10":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "70057SL59307" in serial_number:
            return "fujifilm_x_a10_1"
        elif "7DC57UL44710" in serial_number:
            return "fujifilm_x_a10_2"
    elif device == "DC-TZ90":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "(XCL)" in serial_number:
            return "panasonic_lumix_dc_tz90_1"
        elif "(XCR)" in serial_number:
            return "panasonic_lumix_dc_tz90_2"
    elif device == "DSC-RX100":
        software = metadata.get("Software", "")
        if "v1.00" in software:
            return "sony_rx100_1"
        elif "v2.00" in software:
            return "sony_rx100_2"
    return device_name_mapping.get(device, "unknown_device")


def read_metadata(file_path):
    """
    Read metadata from an image file using exiftool.

    Parameters:
    file_path (str): Path to the image file.

    Returns:
    dict: Extracted metadata of the image file.
    """
    try:
        result = subprocess.run(
            [
                "../tools/executables/exiftool.exe",
                "-Model",
                "-ISO",
                "-ExposureTime",
                "-FNumber",
                "-InternalSerialNumber",
                "-Software",
                "-j",
                file_path,
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        metadata = eval(result.stdout.strip())
        if metadata:
            return metadata[0]
    except subprocess.CalledProcessError as e:
        print(f"Error reading metadata from {file_path}: {e}")
    return {}


def rename_and_sort_files(input_dir, output_dir):
    """
    Rename and sort image files based on their metadata into a structured directory format.

    Parameters:
    input_dir (str): Directory containing the image files to be sorted.
    output_dir (str): Directory to save the sorted image files.
    """
    file_count = defaultdict(lambda: defaultdict(int))
    for root, _, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(root, file)
            metadata = read_metadata(file_path)
            device_name = differentiate_device(metadata)
            iso = metadata.get("ISO", "unknown_iso")
            exposure_time = str(
                metadata.get("ExposureTime", "unknown_exposure")
            ).replace("/", "_")
            fnumber = metadata.get("FNumber", "unknown_fnumber")

            if fnumber == "0" or fnumber == 0:
                fnumber = "0.0"

            if not all([device_name, iso, exposure_time, fnumber]):
                print(f"{device_name}, {iso}, {exposure_time}, {fnumber}")
                print(f"Skipping file {file_path} due to missing metadata.")
                continue

            base_name = f"{device_name}_reference_ISO{iso}_{exposure_time}_{fnumber}"
            extension = os.path.splitext(file)[1].lower().replace(".", "")

            # Increment and format the sequence number for unique filenames
            file_count[base_name][extension] += 1
            sequence_number = f"_{file_count[base_name][extension]}"
            new_name = f"{base_name}{sequence_number}.{extension}"

            new_path = os.path.join(output_dir, device_name, "reference", new_name)

            print(f"Moving {file_path} to {new_path}")
            os.makedirs(os.path.dirname(new_path), exist_ok=True)
            shutil.move(file_path, new_path)


# Example usage
input_directory = ""  # Update this path to your input directory
output_directory = ""  # Update this path to your output directory

rename_and_sort_files(input_directory, output_directory)

### SDR Images

In [None]:
import os
import subprocess
import shutil
from collections import defaultdict

# Mapping of device metadata names to desired naming conventions
device_name_mapping = {
    "Canon EOS 6D": "canon_eos_6d",
    "Canon EOS 6D Mark II": "canon_eos_6d_mark_ii",
    "Canon EOS 80D": "canon_eos_80d",
    "Canon EOS M6": "canon_eos_m6",
    "X-A10": "fujifilm_x_a10",
    "NIKON D7200": "nikon_d7200",
    "DC-TZ90": "panasonic_lumix_dc_tz90",
    "E-M10MarkII": "olympus_e_m10_mark_ii",
    "sd Quattro": "sigma_sd_quattro",
    "ILCA-68": "sony_alpha_68",
    "DSC-RX100": "sony_rx100",
}


def differentiate_device(metadata):
    """
    Differentiate devices based on specific metadata fields to uniquely identify each device.

    Parameters:
    metadata (dict): Metadata of the image file.

    Returns:
    str: Device name based on the mapping or specific metadata fields.
    """
    device = metadata.get("Model", "unknown_device")
    if device == "X-A10":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "70057SL59307" in serial_number:
            return "fujifilm_x_a10_1"
        elif "7DC57UL44710" in serial_number:
            return "fujifilm_x_a10_2"
    elif device == "DC-TZ90":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "(XCL)" in serial_number:
            return "panasonic_lumix_dc_tz90_1"
        elif "(XCR)" in serial_number:
            return "panasonic_lumix_dc_tz90_2"
    elif device == "DSC-RX100":
        software = metadata.get("Software", "")
        if "v1.00" in software:
            return "sony_rx100_1"
        elif "v2.00" in software:
            return "sony_rx100_2"
    return device_name_mapping.get(device, "unknown_device")


def read_metadata(file_path):
    """
    Read metadata from an image file using exiftool.

    Parameters:
    file_path (str): Path to the image file.

    Returns:
    dict: Extracted metadata of the image file.
    """
    try:
        result = subprocess.run(
            [
                "../tools/executables/exiftool.exe",
                "-Model",
                "-ISO",
                "-ExposureTime",
                "-FNumber",
                "-InternalSerialNumber",
                "-Software",
                "-j",
                file_path,
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        metadata = eval(result.stdout.strip())
        if metadata:
            return metadata[0]
    except subprocess.CalledProcessError as e:
        print(f"Error reading metadata from {file_path}: {e}")
    return {}


def rename_and_sort_files(input_dir, output_dir):
    """
    Rename and sort image files based on their metadata into a structured directory format.

    Parameters:
    input_dir (str): Directory containing the image files to be sorted.
    output_dir (str): Directory to save the sorted image files.
    """
    file_count = defaultdict(lambda: defaultdict(int))
    for root, _, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(root, file)
            metadata = read_metadata(file_path)
            device_name = differentiate_device(metadata)
            iso = metadata.get("ISO", "unknown_iso")
            exposure_time = str(
                metadata.get("ExposureTime", "unknown_exposure")
            ).replace("/", "_")
            fnumber = metadata.get("FNumber", "unknown_fnumber")

            if fnumber == "0" or fnumber == 0:
                fnumber = "0.0"

            if not all([device_name, iso, exposure_time, fnumber]):
                print(f"{device_name}, {iso}, {exposure_time}, {fnumber}")
                print(f"Skipping file {file_path} due to missing metadata.")
                continue

            base_name = f"{device_name}_sdr_image_ISO{iso}_{exposure_time}_{fnumber}"
            extension = os.path.splitext(file)[1].lower().replace(".", "")

            # Increment and format the sequence number for unique filenames
            file_count[base_name][extension] += 1
            sequence_number = f"_{file_count[base_name][extension]}"
            new_name = f"{base_name}{sequence_number}.{extension}"

            new_path = os.path.join(output_dir, device_name, "sdr_image", new_name)

            print(f"Moving {file_path} to {new_path}")
            os.makedirs(os.path.dirname(new_path), exist_ok=True)
            shutil.move(file_path, new_path)


# Example usage
input_directory = ""  # Update this path to your input directory
output_directory = ""  # Update this path to your output directory

rename_and_sort_files(input_directory, output_directory)

In [None]:
import os
import subprocess
import shutil
from collections import defaultdict

# Mapping of device metadata names to desired naming conventions
device_name_mapping = {
    "Canon EOS 6D": "canon_eos_6d",
    "Canon EOS 6D Mark II": "canon_eos_6d_mark_ii",
    "Canon EOS 80D": "canon_eos_80d",
    "Canon EOS M6": "canon_eos_m6",
    "X-A10": "fujifilm_x_a10",
    "NIKON D7200": "nikon_d7200",
    "DC-TZ90": "panasonic_lumix_dc_tz90",
    "E-M10MarkII": "olympus_e_m10_mark_ii",
    "sd Quattro": "sigma_sd_quattro",
    "ILCA-68": "sony_alpha_68",
    "DSC-RX100": "sony_rx100",
}


def differentiate_device(metadata):
    device = metadata.get("Model", "unknown_device")
    if device == "X-A10":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "70057SL59307" in serial_number:
            return "fujifilm_x_a10_1"
        elif "7DC57UL44710" in serial_number:
            return "fujifilm_x_a10_2"
    elif device == "DC-TZ90":
        serial_number = metadata.get("InternalSerialNumber", "")
        if "(XCL)" in serial_number:
            return "panasonic_lumix_dc_tz90_1"
        elif "(XCR)" in serial_number:
            return "panasonic_lumix_dc_tz90_2"
    elif device == "DSC-RX100":
        software = metadata.get("Software", "")
        if "v1.00" in software:
            return "sony_rx100_1"
        elif "v2.00" in software:
            return "sony_rx100_2"
    return device_name_mapping.get(device, "unknown_device")


def read_metadata(file_path):
    try:
        result = subprocess.run(
            [
                "../tools/executables/exiftool.exe",
                "-Model",
                "-ISO",
                "-ExposureTime",
                "-FNumber",
                "-InternalSerialNumber",
                "-Software",
                "-j",
                file_path,
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        metadata = eval(result.stdout.strip())
        if metadata:
            return metadata[0]
    except subprocess.CalledProcessError as e:
        print(f"Error reading metadata from {file_path}: {e}")
    return {}


def rename_and_sort_files(input_dir, output_dir):
    file_count = defaultdict(lambda: defaultdict(int))
    for root, _, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(root, file)
            metadata = read_metadata(file_path)
            device_name = differentiate_device(metadata)
            iso = metadata.get("ISO", "unknown_iso")
            exposure_time = str(
                metadata.get("ExposureTime", "unknown_exposure")
            ).replace("/", "_")
            fnumber = metadata.get(
                "FNumber", "unknown_fnumber"
            ) 
            
            if fnumber == "0" or fnumber == 0:
                fnumber = "0.0"
                
            if not all([device_name, iso, exposure_time, fnumber]):
                print(f"{device_name}, {iso}, {exposure_time}, {fnumber}")
                print(f"Skipping file {file_path} due to missing metadata.")
                continue

            base_name = f"{device_name}_sdr_image_ISO{iso}_{exposure_time}_{fnumber}"
            extension = os.path.splitext(file)[1].lower().replace(".", "")

            # Increment and format the sequence number for unique filenames
            file_count[base_name][extension] += 1
            sequence_number = f"_{file_count[base_name][extension]}"
            new_name = f"{base_name}{sequence_number}.{extension}"

            new_path = os.path.join(output_dir, device_name, "sdr_image", new_name)

            print(f"Moving {file_path} to {new_path}")
            os.makedirs(os.path.dirname(new_path), exist_ok=True)
            shutil.move(file_path, new_path)


# Example usage
input_directory = ""
output_directory = ""

rename_and_sort_files(input_directory, output_directory)

---
# Data Herb Generation

### Description:
This script generates metadata for the "Warwick Image Forensics Dataset," capturing essential information about the dataset structure, file types, and key attributes. The metadata is saved in YAML format, facilitating integration with data management tools like DataHerb.

### Usage:
1. Update the `dataset_root` variable to point to your dataset directory.
2. Run the script to generate the `metadata.yml` file.
3. The generated YAML file will contain detailed metadata about the dataset, including information about the different types of images and their attributes.


In [None]:
import os
import yaml


def get_directory_size_and_count(path):
    """
    Calculate the total size (in GB) and the total count of files in a directory.

    Parameters:
    path (str): Path to the directory.

    Returns:
    tuple: Total size in GB, total count of files.
    """
    total_size = 0
    total_count = 0
    for dirpath, _, filenames in os.walk(path):
        total_count += len(filenames)
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    total_size_gb = total_size / (1024**3)
    return total_size_gb, total_count


def generate_dataherb_metadata(dataset_root, output_path):
    """
    Generate metadata for the Warwick Image Forensics Dataset in YAML format.

    Parameters:
    dataset_root (str): Root directory of the dataset.
    output_path (str): Path to save the output YAML file.

    This function walks through the dataset directory, identifies different image types,
    calculates their size and count, and saves the metadata in YAML format.
    """
    dataherb_metadata = {
        "name": "Warwick Image Forensics Dataset",
        "description": (
            "This dataset features over 58,600 images captured using 14 different digital cameras "
            "with various exposure settings, specifically designed to advance research in device "
            "fingerprinting and multimedia forensics. It addresses the challenges posed by high "
            "dynamic range (HDR) imaging and multi-frame photography algorithms."
        ),
        "contributors": [
            {"name": "Yijun Quan"},
            {"name": "Chang-Tsun Li"},
            {"name": "Yujue Zhou"},
            {"name": "Li Li"},
            {"name": "Luke Collins"},
        ],
        "data": [],
        "license": [
            {"name": "MIT License", "link": "https://opensource.org/licenses/MIT"}
        ],
        "references": [
            {
                "name": "Warwick Image Forensics Dataset for Device Fingerprinting in Multimedia Forensics",
                "link": "https://ieeexplore.ieee.org/abstract/document/9102783",
            }
        ],
    }

    for root, _, files in os.walk(dataset_root):
        if not files:
            continue

        # Determine data type and fields based on the directory structure
        if "Burst" in root:
            data_type = "Burst Images"
            description = "Images captured under HDR exposure settings."
            fields = [
                {
                    "name": "Camera_ID",
                    "description": "Identifier for the camera used to capture the image.",
                },
                {
                    "name": "ISO_Setting",
                    "description": "The ISO setting used when capturing the image.",
                },
                {
                    "name": "Exposure_Time",
                    "description": "The exposure time for each image.",
                },
                {
                    "name": "Sequence_Number",
                    "description": "The sequence number for multiple images with the same settings.",
                },
            ]
        elif "AEB" in root:
            data_type = "AEB Images"
            description = "Images captured under HDR exposure settings."
            fields = [
                {
                    "name": "Camera_ID",
                    "description": "Identifier for the camera used to capture the image.",
                },
                {
                    "name": "ISO_Setting",
                    "description": "The ISO setting used when capturing the image.",
                },
                {
                    "name": "Exposure_Time",
                    "description": "The exposure time for each image.",
                },
            ]
        elif "Reference" in root:
            data_type = "Reference Images"
            description = "Images captured under reference exposure settings."
            fields = [
                {
                    "name": "Camera_ID",
                    "description": "Identifier for the camera used to capture the image.",
                },
                {
                    "name": "ISO_Setting",
                    "description": "The ISO setting used when capturing the image.",
                },
                {
                    "name": "Exposure_Time",
                    "description": "The exposure time for each image.",
                },
                {
                    "name": "FNumber",
                    "description": "The FNumber (aperture value) used when capturing the image.",
                },
                {
                    "name": "Sequence_Number",
                    "description": "The sequence number for multiple images with the same settings.",
                },
            ]
        elif "SDR" in root:
            data_type = "SDR Images"
            description = "Images captured under SDR exposure settings."
            fields = [
                {
                    "name": "Camera_ID",
                    "description": "Identifier for the camera used to capture the image.",
                },
                {
                    "name": "ISO_Setting",
                    "description": "The ISO setting used when capturing the image.",
                },
                {
                    "name": "Exposure_Time",
                    "description": "The exposure time for each image.",
                },
                {
                    "name": "FNumber",
                    "description": "The FNumber (aperture value) used when capturing the image.",
                },
                {
                    "name": "Sequence_Number",
                    "description": "The sequence number for multiple images with the same settings.",
                },
            ]
        else:
            continue

        # Calculate size and count for the current directory
        size_gb, count = get_directory_size_and_count(root)

        # Create a data entry for the current directory
        data_entry = {
            "name": data_type,
            "description": description,
            "path": root,
            "format": "various",
            "size": f"{size_gb:.2f} GB",
            "count": count,
            "fields": fields,
        }

        # Append the data entry to the metadata
        dataherb_metadata["data"].append(data_entry)

    # Write the metadata to a YAML file
    with open(output_path, "w") as file:
        yaml.dump(
            dataherb_metadata,
            file,
            sort_keys=False,
            default_flow_style=False,
            allow_unicode=True,
        )


# Example usage
dataset_root = "../dataset/"  # Update this path to your dataset directory
output_file_path = "../.dataherb/metadata.yml"

# Generate the dataherb metadata file
generate_dataherb_metadata(dataset_root, output_file_path)

print(f"Dataherb file generated and saved to {output_file_path}")