In [8]:
import os
os.chdir("/workspaces/Howler")

In [9]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class KFoldConfig:
    root_dir: Path
    seed: int
    folds: int
    fold_file: Path
    images_path: Path


In [10]:
from HowlerMonkey.constants import *
from HowlerMonkey.utils.common import read_yaml, create_directories

In [11]:
class ConfigurationManager:
    
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    def get_kfold_config(self) -> KFoldConfig:

        config = self.config.kfold

        create_directories([config.root_dir])
        
        data_ingestion_config = KFoldConfig(
            root_dir=Path(config.root_dir),
            seed=config.seed,
            folds=config.folds,
            fold_file=Path(config.root_dir) / config.fold_file,
            images_path=Path(config.images_path)
        )
        
        
        return data_ingestion_config

In [12]:
import json
import glob
import numpy as np
from sklearn.model_selection import KFold


from HowlerMonkey import logger

In [13]:
class KFoldCreator:

    def __init__(self, config: KFoldConfig):
        self.config = config

    def create_folder(self):
        logger.info(f"Creating KFold folder at {self.config.root_dir}")

        self.kf = KFold(n_splits=self.config.folds, shuffle=True, random_state=self.config.seed)

    def get_images_paths(self):
        logger.info(f"Getting images paths from {self.config.images_path}")
        self.images_paths = glob.glob(str(self.config.images_path/ 'main' / '*.jpg'))

    def create_kfolds(self):
        
        self.folds_info = []

        logger.info(f"Creating {self.config.folds}")

        for fold, (train_idx, val_idx) in enumerate(self.kf.split(self.images_paths)):
            
            fold_data = {
                'fold': fold,
                'train_indices': train_idx.tolist(),
                'val_indices': val_idx.tolist()
            }
            self.folds_info.append(fold_data)

        logger.info(f"KFold created")
        logger.info(f"Saving KFold info at {self.config.fold_file}")

        with open(self.config.fold_file, 'w') as f:
            json.dump(self.folds_info, f, indent=4)

        logger.info(f"KFold info saved")

            

In [14]:
try:
    config = ConfigurationManager()
    kfold_creator = KFoldCreator(config.get_kfold_config())
    kfold_creator.create_folder()
    kfold_creator.get_images_paths()
    kfold_creator.create_kfolds()
except Exception as e:
    raise e

[2024-08-29 21:17:03,484: INFO: common] yaml file: config/config.yaml loaded successfully
[2024-08-29 21:17:03,487: INFO: common] yaml file: params.yaml loaded successfully
[2024-08-29 21:17:03,488: INFO: common] Creating directory: artifacts
[2024-08-29 21:17:03,489: INFO: common] Creating directory: artifacts/kfold
[2024-08-29 21:17:03,490: INFO: 3840771769] Creating KFold folder at artifacts/kfold
[2024-08-29 21:17:03,491: INFO: 3840771769] Getting images paths from datasets/images
[2024-08-29 21:17:03,493: INFO: 3840771769] Creating 5
[2024-08-29 21:17:03,498: INFO: 3840771769] KFold created
[2024-08-29 21:17:03,499: INFO: 3840771769] Saving KFold info at artifacts/kfold/folds_info.json
[2024-08-29 21:17:03,500: INFO: 3840771769] KFold info saved
