In [6]:
import os
import shutil
import glob

def NemoProcess(nemo_path, output_path, test_split=0.1):
    """
    Process the Nemo dataset into YOLO format with images and labels structured for train, val, and test sets.

    Args:
        nemo_path (str): Path to the root of the Nemo dataset.
        output_path (str): Path where the YOLO formatted dataset will be stored.
        test_split (float): Fraction of the data to use as a test set.

    Returns:
        None
    """

    # Define paths
    train_img_path = os.path.join(nemo_path, 'train', 'img')
    train_ann_path = os.path.join(nemo_path, 'train', 'ann')
    val_img_path = os.path.join(nemo_path, 'val', 'img')
    val_ann_path = os.path.join(nemo_path, 'val', 'ann')

    # Create YOLO directory structure
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_path, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(output_path, 'labels', split), exist_ok=True)

    # Copy and split the dataset
    def process_split(img_path, ann_path, split):
        images = glob.glob(os.path.join(img_path, "*.jpg"))  # Assuming images are in .jpg format
        labels = glob.glob(os.path.join(ann_path, "*.json"))

        for img_file, label_file in zip(sorted(images), sorted(labels)):
            img_name = os.path.basename(img_file)
            label_name = os.path.basename(label_file)

            shutil.copy(img_file, os.path.join(output_path, 'images', split, img_name))
            shutil.copy(label_file, os.path.join(output_path, 'labels', split, label_name))

    # Process train and validation sets
    process_split(train_img_path, train_ann_path, 'train')
    process_split(val_img_path, val_ann_path, 'val')

    # Optionally, move a portion of validation to the test set
    val_images = os.listdir(os.path.join(output_path, 'images', 'val'))
    val_labels = os.listdir(os.path.join(output_path, 'labels', 'val'))
    
    num_test = int(len(val_images) * test_split)
    test_images = val_images[:num_test]
    test_labels = val_labels[:num_test]
    
    for test_img, test_lbl in zip(test_images, test_labels):
        shutil.move(os.path.join(output_path, 'images', 'val', test_img), os.path.join(output_path, 'images', 'test', test_img))
        shutil.move(os.path.join(output_path, 'labels', 'val', test_lbl), os.path.join(output_path, 'labels', 'test', test_lbl))

   
    print(f"YOLO format dataset created at {output_path}")



In [7]:
NemoProcess(
    nemo_path=r'C:\nico\wildfire2024\data\Nemo',
    output_path=r'C:\nico\wildfire2024\data\test',
    test_split=0.1
)

2684
250
YOLO format dataset created at C:\nico\wildfire2024\data\test


In [15]:
import os
import shutil
import glob
import json

def process_labels_nemo(json_path, txt_output_path):
    """
    Convert label data from JSON format to YOLO format.

    Args:
        json_path (str): Path to the JSON file containing the label information.
        txt_output_path (str): Path to save the converted YOLO format label (.txt) file.

    Returns:
        None
    """

    with open(json_path, 'r') as json_file:
        data = json.load(json_file)

    image_width = data['size']['width']
    image_height = data['size']['height']

    yolo_labels = []

    for obj in data['objects']:
        if obj['geometryType'] == 'rectangle':
            class_id = 0  # Since we only have one class: smoke

            # Extract the exterior points
            x_min, y_min = obj['points']['exterior'][0]
            x_max, y_max = obj['points']['exterior'][1]

            # Convert to YOLO format (x_center, y_center, width, height)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height

            yolo_labels.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    # Write the labels to a .txt file
    with open(txt_output_path, 'w') as txt_file:
        for label in yolo_labels:
            txt_file.write(label + '\n')

def NemoProcess2(nemo_path, output_path, test_split=0.1):
    """
    Process the Nemo dataset into YOLO format with images and labels structured for train, val, and test sets.

    Args:
        nemo_path (str): Path to the root of the Nemo dataset.
        output_path (str): Path where the YOLO formatted dataset will be stored.
        test_split (float): Fraction of the data to use as a test set.

    Returns:
        None
    """

    # Define paths
    train_img_path = os.path.join(nemo_path, 'train', 'img')
    train_ann_path = os.path.join(nemo_path, 'train', 'ann')
    val_img_path = os.path.join(nemo_path, 'val', 'img')
    val_ann_path = os.path.join(nemo_path, 'val', 'ann')

    # Create YOLO directory structure
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_path, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(output_path, 'labels', split), exist_ok=True)

    # Copy and split the dataset
    def process_split(img_path, ann_path, split):
        images = glob.glob(os.path.join(img_path, "*.jpg"))  # Assuming images are in .jpg format
        labels = glob.glob(os.path.join(ann_path, "*.json"))

        for img_file, json_file in zip(sorted(images), sorted(labels)):
            img_name = os.path.basename(img_file)
            txt_output_name = img_name.replace('.jpg', '.txt')
            txt_output_path = os.path.join(output_path, 'labels', split, txt_output_name)

            # Process the JSON label file
            process_labels_nemo(json_file, txt_output_path)

            shutil.copy(img_file, os.path.join(output_path, 'images', split, img_name))

    # Process train and validation sets
    process_split(train_img_path, train_ann_path, 'train')
    process_split(val_img_path, val_ann_path, 'val')

    # Optionally, move a portion of validation to the test set
    train_images = os.listdir(os.path.join(output_path, 'images', 'train'))
    train_labels = os.listdir(os.path.join(output_path, 'labels', 'train'))
    
    num_test = int(len(train_images) * test_split)
    test_images = train_images[:num_test]
    test_labels = train_labels[:num_test]
    
    for test_img, test_lbl in zip(test_images, test_labels):
        shutil.move(os.path.join(output_path, 'images', 'train', test_img), os.path.join(output_path, 'images', 'test', test_img))
        shutil.move(os.path.join(output_path, 'labels', 'train', test_lbl), os.path.join(output_path, 'labels', 'test', test_lbl))
    # print len of train, val and test
    print(f"Train: {len(os.listdir(os.path.join(output_path, 'images', 'train')))}")
    print(f"Val: {len(os.listdir(os.path.join(output_path, 'images', 'val')))}")
    print(f"Test: {len(os.listdir(os.path.join(output_path, 'images', 'test')))}")
    print(f"YOLO format dataset created at {output_path}")



In [17]:
NemoProcess2(
    nemo_path=r'C:\nico\wildfire2024\data\Nemo',
    output_path=r'C:\nico\wildfire2024\data\NemoFixed',
    test_split=0.1
)

Train: 2416
Val: 250
Test: 268
YOLO format dataset created at C:\nico\wildfire2024\data\NemoFixed


## figlib


In [21]:
import os
import shutil
import glob
import json
import random

def process_labels(json_path, txt_output_path):
    """
    Convert label data from JSON format to YOLO format.

    Args:
        json_path (str): Path to the JSON file containing the label information.
        txt_output_path (str): Path to save the converted YOLO format label (.txt) file.

    Returns:
        None
    """

    with open(json_path, 'r') as json_file:
        data = json.load(json_file)

    image_width = data['size']['width']
    image_height = data['size']['height']

    yolo_labels = []

    for obj in data['objects']:
        if obj['geometryType'] == 'rectangle':
            class_id = 0  # Since we only have one class: smoke

            # Extract the exterior points
            x_min, y_min = obj['points']['exterior'][0]
            x_max, y_max = obj['points']['exterior'][1]

            # Convert to YOLO format (x_center, y_center, width, height)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height

            yolo_labels.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    # Write the labels to a .txt file
    with open(txt_output_path, 'w') as txt_file:
        for label in yolo_labels:
            txt_file.write(label + '\n')

def FigLibProcess(figlib_path, output_path, val_split=0.1, test_split=0.1):
    """
    Process the FigLib dataset into YOLO format with images and labels structured for train, val, and test sets.

    Args:
        figlib_path (str): Path to the root of the FigLib dataset.
        output_path (str): Path where the YOLO formatted dataset will be stored.
        val_split (float): Fraction of the data to use as a validation set.
        test_split (float): Fraction of the data to use as a test set.

    Returns:
        None
    """

    # Create YOLO directory structure
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_path, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(output_path, 'labels', split), exist_ok=True)

    # Gather all image-label pairs across all fire events
    all_images = []
    all_labels = []

    for event_folder in os.listdir(figlib_path):
        event_path = os.path.join(figlib_path, event_folder)
        if os.path.isdir(event_path):
            img_path = os.path.join(event_path, 'images')
            ann_path = os.path.join(event_path, 'labels')

            images = glob.glob(os.path.join(img_path, "*.jpg"))  # Assuming images are in .jpg format
            labels = glob.glob(os.path.join(ann_path, "*.txt"))

            all_images.extend(images)
            all_labels.extend(labels)

    # Shuffle the dataset
    combined = list(zip(all_images, all_labels))
    random.shuffle(combined)
    all_images[:], all_labels[:] = zip(*combined)

    # Split the dataset
    num_total = len(all_images)
    num_test = int(num_total * test_split)
    num_val = int(num_total * val_split)

    test_images = all_images[:num_test]
    test_labels = all_labels[:num_test]

    val_images = all_images[num_test:num_test + num_val]
    val_labels = all_labels[num_test:num_test + num_val]

    train_images = all_images[num_test + num_val:]
    train_labels = all_labels[num_test + num_val:]

    def process_split(images, labels, split):
        for img_file, json_file in zip(images, labels):
            img_name = os.path.basename(img_file)
            txt_output_name = img_name.replace('.jpg', '.txt')
            txt_output_path = os.path.join(output_path, 'labels', split, txt_output_name)

            # Process the JSON label file
            process_labels(json_file, txt_output_path)

            shutil.copy(img_file, os.path.join(output_path, 'images', split, img_name))

    # Process each split
    process_split(train_images, train_labels, 'train')
    process_split(val_images, val_labels, 'val')
    process_split(test_images, test_labels, 'test')

    print(f"YOLO format dataset created at {output_path}")




In [29]:
import os
import shutil
import glob
import random

def FigLibProcess(figlib_path, output_path, val_split=0.1, test_split=0.1):
    """
    Process the FigLib dataset into YOLO format with images and labels structured for train, val, and test sets.

    Args:
        figlib_path (str): Path to the root of the FigLib dataset.
        output_path (str): Path where the YOLO formatted dataset will be stored.
        val_split (float): Fraction of the data to use as a validation set.
        test_split (float): Fraction of the data to use as a test set.

    Returns:
        None
    """

    # Create YOLO directory structure
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_path, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(output_path, 'labels', split), exist_ok=True)

    # Gather all image-label pairs across all fire events
    all_images = []
    all_labels = []

    for event_folder in os.listdir(figlib_path):
        event_path = os.path.join(figlib_path, event_folder)
        if os.path.isdir(event_path):
            # img_path = os.path.join(event_path, 'images')
            ann_path = os.path.join(event_path, 'labels')

            images = glob.glob(os.path.join(event_path, "*.jpg"))  # Assuming images are in .jpg format
            labels = glob.glob(os.path.join(ann_path, "*.txt"))  # Assuming labels are in .txt format

            all_images.extend(images)
            all_labels.extend(labels)
    print(f"Total images: {len(all_images)}")
    print(f"Total labels: {len(all_labels)}")
    # Shuffle the dataset
    combined = list(zip(all_images, all_labels))
    random.shuffle(combined)
    all_images[:], all_labels[:] = zip(*combined)

    # Split the dataset
    num_total = len(all_images)
    num_test = int(num_total * test_split)
    num_val = int(num_total * val_split)

    test_images = all_images[:num_test]
    test_labels = all_labels[:num_test]

    val_images = all_images[num_test:num_test + num_val]
    val_labels = all_labels[num_test:num_test + num_val]

    train_images = all_images[num_test + num_val:]
    train_labels = all_labels[num_test + num_val:]

    def process_split(images, labels, split):
        for img_file, label_file in zip(images, labels):
            img_name = os.path.basename(img_file)
            label_name = os.path.basename(label_file)

            shutil.copy(img_file, os.path.join(output_path, 'images', split, img_name))
            shutil.copy(label_file, os.path.join(output_path, 'labels', split, label_name))

    # Process each split
    process_split(train_images, train_labels, 'train')
    process_split(val_images, val_labels, 'val')
    process_split(test_images, test_labels, 'test')

    print(f"YOLO format dataset created at {output_path}")



In [30]:
figlib_path = r"C:\nico\wildfire2024\data\Fig_Lib\FIGLIB_ANNOTATED_RESIZED"  # Path to the root of the extracted FigLib dataset
output_path = r"C:\nico\wildfire2024\data\testfig"  # Path where you want to store the YOLO formatted dataset

FigLibProcess(figlib_path, output_path)

Total images: 24758
Total labels: 24758
YOLO format dataset created at C:\nico\wildfire2024\data\testfig


## DS-71c1fd51-v2

In [37]:
import os
import shutil
import random

def create_test_split_with_copy(dataset_path, new_dataset_path, test_split=0.1):
    """
    Create a test split by copying the entire dataset to a new location,
    then moving a portion of the train set to a new test set. Also, create
    empty .txt files for images that lack corresponding labels.

    Args:
        dataset_path (str): Path to the original dataset with 'train' and 'val' directories.
        new_dataset_path (str): Path to the new dataset location.
        test_split (float): Fraction of the train set to use as a test set.

    Returns:
        None
    """

    # Copy the entire dataset to a new location
    if os.path.exists(new_dataset_path):
        print(f"Removing existing directory at {new_dataset_path}")
        shutil.rmtree(new_dataset_path)
    shutil.copytree(dataset_path, new_dataset_path)
    print(f"Copied dataset from {dataset_path} to {new_dataset_path}")

    # Define paths in the new dataset
    train_img_path = os.path.join(new_dataset_path, 'images', 'train')
    train_label_path = os.path.join(new_dataset_path, 'labels', 'train')

    val_img_path = os.path.join(new_dataset_path, 'images', 'val')
    val_label_path = os.path.join(new_dataset_path, 'labels', 'val')

    test_img_path = os.path.join(new_dataset_path, 'images', 'test')
    test_label_path = os.path.join(new_dataset_path, 'labels', 'test')

    # Create test directories if they don't exist
    os.makedirs(test_img_path, exist_ok=True)
    os.makedirs(test_label_path, exist_ok=True)

    # Ensure all images have corresponding labels
    def ensure_labels_exist(image_dir, label_dir):
        images = sorted(os.listdir(image_dir))
        labels = set(os.listdir(label_dir))

        for img_name in images:
            label_name = img_name.replace('.jpg', '.txt')  # Assuming the images are in .jpg format
            if label_name not in labels:
                # Create an empty label file if it doesn't exist
                open(os.path.join(label_dir, label_name), 'w').close()

    # Ensure labels exist in train and val sets
    ensure_labels_exist(train_img_path, train_label_path)
    ensure_labels_exist(val_img_path, val_label_path)

    # List all images and corresponding labels
    train_images = sorted(os.listdir(train_img_path))
    train_labels = sorted(os.listdir(train_label_path))

    # Shuffle and split the dataset
    combined = list(zip(train_images, train_labels))
    random.shuffle(combined)
    train_images[:], train_labels[:] = zip(*combined)

    num_test = int(len(train_images) * test_split)

    test_images = train_images[:num_test]
    test_labels = train_labels[:num_test]

    # Move the selected images and labels to the test set
    for img_name, label_name in zip(test_images, test_labels):
        shutil.move(os.path.join(train_img_path, img_name), os.path.join(test_img_path, img_name))
        shutil.move(os.path.join(train_label_path, label_name), os.path.join(test_label_path, label_name))

    print(f"Moved {num_test} images and labels to the test set.")

    # Ensure labels exist in the test set
    ensure_labels_exist(test_img_path, test_label_path)

    print(f"Completed creating the test set and ensuring all images have labels.")





In [38]:
dataset_path = r"C:\nico\wildfire2024\data\DS-71c1fd51-v2"  # Path to the original dataset
new_dataset_path = r"C:\nico\wildfire2024\data\DS-71c1fd51-v2-Fixed"  # Path to the new dataset copy

create_test_split_with_copy(dataset_path, new_dataset_path, test_split=0.1)

Copied dataset from C:\nico\wildfire2024\data\DS-71c1fd51-v2 to C:\nico\wildfire2024\data\DS-71c1fd51-v2-Fixed
Moved 670 images and labels to the test set.
Completed creating the test set and ensuring all images have labels.


In [1]:
import os

# Rutas a las carpetas que contienen los archivos txt y jpg
carpeta_txt = r'C:\Users\corpa\Downloads\pyronear_ds_03_2024_extraido\pyronear_ds_03_2024\labels\val'
carpeta_jpg = r'C:\Users\corpa\Downloads\pyronear_ds_03_2024_extraido\pyronear_ds_03_2024\images\val'

# Crear conjuntos con los nombres de archivo sin la extensión
nombres_txt = {archivo.split('.')[0] for archivo in os.listdir(carpeta_txt) if archivo.endswith('.txt')}
nombres_jpg = {archivo.split('.')[0] for archivo in os.listdir(carpeta_jpg) if archivo.endswith('.jpg')}

# Encontrar nombres de jpg que no tienen un correspondiente archivo txt
jpg_sin_txt = nombres_jpg - nombres_txt

# Crear una lista de los nombres de archivos jpg que no tienen txt correspondiente
archivos_jpg_sin_txt = [nombre + '.jpg' for nombre in jpg_sin_txt]

print(archivos_jpg_sin_txt)


['pyronear_marguerite_2_2023_08_28T05_41_51.jpg', 'pyronear_brison_4_2023_11_02T10_55_20.jpg', 'pyronear_courmettes_1_2023_11_07T06_31_00.jpg', 'pyronear_salaunes_1_4_2023_08_30T08_08_30.jpg', 'AWF_axis-bryantmtn1_2023_06_04T06_54_58.jpg', 'pyronear_courmettes_4_2023_11_06T14_43_01.jpg', 'awf_nvseismolab_noaaX00101_2019_07_05T13_46_05.jpg', 'pyronear_salaunes_1_1_2023_09_02T18_44_37.jpg', 'AWF_axis-mtharrison_2023_06_04T18_20_03.jpg', 'pyronear_marguerite_4_2023_10_25T08_29_52.jpg', 'pyronear_valbonne_3_2023_11_02T16_34_18.jpg', 'pyronear_cabanelle_3_2023_10_14T17_06_48.jpg', 'pyronear_cabanelle_3_2023_10_05T16_37_42.jpg', 'pyronear_marguerite_1_2023_10_30T12_49_32.jpg', 'pyronear_ferion_4_2023_10_29T15_07_44.jpg', 'pyronear_courmettes_3_2023_10_21T06_23_31.jpg', 'pyronear_brison_3_2023_09_09T07_04_13.jpg', 'pyronear_brison_3_2023_10_28T11_40_49.jpg', 'Pyronear_brison_3_2023_06_06T05_04_26.jpg', 'AWF_axis-ellamtn2_2023_06_04T05_00_25.jpg', 'AWF_axis-riddle_2023_06_04T09_56_18.jpg', 'py

In [5]:
len(nombres_jpg)- len(nombres_txt)

170