In [1]:
import os
import shutil
import pandas as pd
from dataclasses import dataclass
from pathlib import Path
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [2]:
%pwd

'd:\\Proyectos\\MLOps\\Chicken_Disease_Classification_MLOps_Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Proyectos\\MLOps\\Chicken_Disease_Classification_MLOps_Project'

In [5]:
@dataclass(frozen=True)
class DataSortingConfig:
    artifacts_root: Path
    source_csv: Path
    output_dir: Path

In [6]:
class ConfigurationManager:
    def __init__(self, config_filepath: Path):
        self.config = read_yaml(config_filepath)
        create_directories([Path(self.config['artifacts_root'])])  # Crear el directorio raíz
    
    def get_data_sorting_config(self) -> DataSortingConfig:
        config = self.config['data_sorting']
        
        # Crear el directorio raíz para el paso de data sorting
        create_directories([Path(config['root_dir'])])
        create_directories([Path(config['output_dir'])])  # Crear directorio para sorted_images

        # Configurar la clase DataSortingConfig
        data_sorting_config = DataSortingConfig(
            artifacts_root=Path(self.config['artifacts_root']),  # Cambiado aquí
            source_csv=Path(config['source_csv']),
            output_dir=Path(config['output_dir'])
        )
        return data_sorting_config

In [7]:
class DataSorting:
    def sort_images(self, config: DataSortingConfig):
        # Leer el archivo CSV que contiene los nombres de las imágenes y las etiquetas
        df = pd.read_csv(config.source_csv)

        # Crear carpetas para cada clase en el directorio de salida
        classes = ['Salmonella', 'Coccidiosis', 'New_Castle_Disease', 'Healthy']
        for cls in classes:
            class_dir = os.path.join(config.output_dir, cls)
            os.makedirs(class_dir, exist_ok=True)

        # Copiar las imágenes en las carpetas correspondientes según las etiquetas
        for index, row in df.iterrows():
            img_filename = row['images']
            label = row['label']

            if "salmo" in img_filename or "pcrsalmo" in img_filename:
                dest_folder = "Salmonella"
            elif "cocci" in img_filename or "pcrcocci" in img_filename:
                dest_folder = "Coccidiosis"
            elif "ncd" in img_filename or "pcrncd" in img_filename:
                dest_folder = "New_Castle_Disease"
            elif "healthy" in img_filename or "pcrhealthy" in img_filename:
                dest_folder = "Healthy"
            else:
                print(f"Unrecognized image prefix in: {img_filename}")
                continue

            # Ajustar la ruta de la imagen de origen a la ubicación correcta
            # Cambiado para usar artifacts_root
            src_path = os.path.join(config.artifacts_root, "data_ingestion", "chicken-fecal-images", img_filename)

            # Asegurarse de que la ruta de origen sea correcta
            if not os.path.exists(src_path):
                print(f"Source image not found: {src_path}")
                continue

            # Ruta de destino de la imagen
            dest_path = os.path.join(config.output_dir, dest_folder, img_filename)

            # Copiar la imagen al directorio de destino
            shutil.copy(src_path, dest_path)
            print(f"Copied {img_filename} to {dest_folder}")
        
    def transfer_and_clean_folders(data_sorting_config):
        # Rutas
        source_root = data_sorting_config.output_dir  # Ruta de salida de data sorting
        target_root = Path('artifacts/data_ingestion/chicken-fecal-images')  # Ruta de destino

        # Nombres de las carpetas que queremos conservar
        target_folders = ["Coccidiosis", "Healthy", "New_Castle_Disease", "Salmonella"]

        # Transferir las carpetas
        for folder in target_folders:
            src_folder = os.path.join(source_root, folder)
            dest_folder = os.path.join(target_root, folder)

            # Verificar si la carpeta fuente existe antes de copiar
            if os.path.exists(src_folder):
                shutil.copytree(src_folder, dest_folder, dirs_exist_ok=True)
                print(f"Transferred {src_folder} to {dest_folder}")

        # Limpiar archivos que no están en las carpetas especificadas
        for item in os.listdir(target_root):
            if item not in target_folders:
                item_path = os.path.join(target_root, item)
                if os.path.isdir(item_path):
                    shutil.rmtree(item_path)  # Eliminar directorios
                    print(f"Deleted directory: {item_path}")
                else:
                    os.remove(item_path)  # Eliminar archivos
                    print(f"Deleted file: {item_path}")

In [8]:
if __name__ == "__main__":
    # Ruta al archivo config.yaml
    config_filepath = CONFIG_FILE_PATH

    # Crear una instancia de ConfigurationManager y obtener la configuración de data_sorting
    config_manager = ConfigurationManager(config_filepath=config_filepath)
    data_sorting_config = config_manager.get_data_sorting_config()

    # Ejecutar la función de sorting para organizar las imágenes
    sort_images(data_sorting_config)

    # Transferir las carpetas y limpiar archivos
    transfer_and_clean_folders(data_sorting_config)

[2024-10-21 17:16:53,284: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-21 17:16:53,286: INFO: common: created directory at: artifacts]
[2024-10-21 17:16:53,288: INFO: common: created directory at: artifacts\data_sorting]
[2024-10-21 17:16:53,290: INFO: common: created directory at: artifacts\data_sorting\sorted_images]
Copied salmo.1558.jpg to Salmonella
Copied cocci.1866.jpg to Coccidiosis
Copied cocci.171.jpg to Coccidiosis
Copied salmo.1484.jpg to Salmonella
Copied ncd.100.jpg to New_Castle_Disease
Copied salmo.659.jpg to Salmonella
Copied salmo.1386.jpg to Salmonella
Copied healthy.1748.jpg to Healthy
Copied healthy.1156.jpg to Healthy
Copied healthy.1215.jpg to Healthy
Copied cocci.686.jpg to Coccidiosis
Copied healthy.281.jpg to Healthy
Copied cocci.1966.jpg to Coccidiosis
Copied healthy.1892.jpg to Healthy
Copied salmo.2172.jpg to Salmonella
Copied healthy.1286.jpg to Healthy
Copied salmo.1291.jpg to Salmonella
Copied salmo.1012.jpg to Salmonella
Cop