In [1]:
import os
os.chdir("/home/guidomainardi/Howler-cross")

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataSelectorConfig:
    src_images_folder: Path
    src_labels_folder: Path
    dest_images_folder: Path
    dest_labels_folder: Path
    n_images: int

In [3]:
from HowlerMonkey.constants import *
from HowlerMonkey.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    def get_data_selector_config(self) -> DataSelectorConfig:

        config = self.config.data_selector
        kfconfig = self.config.kfold
        
        data_ingestion_config = DataSelectorConfig(
            src_images_folder= Path(kfconfig.images_path) / Path(config.src_folder_name),
            src_labels_folder= Path(kfconfig.labels_path) / Path(config.src_folder_name),
            dest_images_folder= Path(kfconfig.images_path) / Path(config.dest_folder_name),
            dest_labels_folder= Path(kfconfig.labels_path) / Path(config.dest_folder_name),
            n_images = int(config.n_images)
        )
        
        
        return data_ingestion_config

In [9]:
import os
import shutil

from HowlerMonkey.utils.common import clear_and_create_folder
from HowlerMonkey import logger

In [10]:
class DataSelector:

        def __init__(self, config: DataSelectorConfig) -> None:
            self.config = config

        def get_equally_spaced_images(self) -> None:
            
            image_files = sorted(os.listdir(self.config.src_images_folder))
            label_files = sorted(os.listdir(self.config.src_labels_folder))
            
            
            # Calculate the step to select N equally spaced images
            step = max(1, len(image_files) // self.config.n_images)
            
            clear_and_create_folder(
                self.config.dest_images_folder,
                self.config.dest_labels_folder
            )

            selected_indices = range(0, len(image_files), step)
            

            for idx in selected_indices:
                image_file = image_files[idx]
                label_file = label_files[idx]
                

                shutil.copy(
                    self.config.src_images_folder / image_file,
                    self.config.dest_images_folder / image_file
                )
                
                shutil.copy(
                    self.config.src_labels_folder / label_file,
                    self.config.dest_labels_folder / label_file
                )

            logger.info(f"Selected {len(selected_indices)} images and labels.")

In [11]:
try:
    config = ConfigurationManager()
    data_selector = DataSelector(config.get_data_selector_config())
    data_selector.get_equally_spaced_images()
except Exception as e:
    raise e

[2024-09-03 11:45:39,279: INFO: common] yaml file: config/config.yaml loaded successfully
[2024-09-03 11:45:39,282: INFO: common] yaml file: params.yaml loaded successfully
[2024-09-03 11:45:39,283: INFO: common] Creating directory: artifacts
[2024-09-03 11:45:39,331: INFO: common] Clearing main folder
[2024-09-03 11:45:39,333: INFO: common] Creating main folder
[2024-09-03 11:45:40,581: INFO: 3742319382] Selected 5009 images and labels.
