In [56]:
import numpy as np
import os

def load_data(*path: str, labels: list) -> np.ndarray:
    """
    Loads data from a given path and returns a numpy array with the path and label of each file.

    Parameters:
    -----------
    path: str -> Path to the data folder. (can be multiple paths)
    labels: list -> List of labels for each path.

    Returns:
    --------
    data_array: np.ndarray -> Numpy array with the path and label of each file.
    """

    # Array to store the data. 
    data_array = np.array([])

    # ---------------------------------------------------
    # Check if the number of paths and labels are equal.
    # If not equal raise an error.
    # ---------------------------------------------------
    if len(path) == len(labels):

        # Loop through the paths and labels.
        for p, l in zip(path, labels):

            # --------------------------
            # Check if the path exists.
            # If not raise an error.
            # --------------------------    
            if os.path.exists(p):
                
                # ----------------------------------------------
                # Loop through the files in the path.
                # Append the path and label to the numpy array.
                # ----------------------------------------------
                for file in os.listdir(p):
                    data_array = np.append(data_array, [os.path.join(p, file), l]).reshape(-1, 2)
            else:
                raise FileNotFoundError("Path does not exist")
    else:
        raise ValueError("Number of paths and labels must be equal")
    
    # Return the numpy array.
    return data_array

In [61]:
dataset = load_data('./Data/fire/fire/fire-images/', './Data/fire/fire/forest-images/', labels=[0, 1])

print(f"dataset len: {len(dataset)}")
print(f"dataset shape: {dataset.shape}")
print(f"dataset fire images: {len([index for index in dataset if index[1] == '0'])}")
print(f"dataset forest images: {len([index for index in dataset if index[1] == '1'])}")

dataset len: 5162
dataset shape: (5162, 2)
dataset fire images: 2500
dataset forest images: 2662
