In [1]:
import os
os.chdir("/workspaces/Howler")

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    n_assistant_images: int
    fold_file: Path
    images_path: Path
    labels_path: Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    model_path: str
    data_file_path: Path
    params_epochs: int
    params_batch_size: int
    model_name: str

In [3]:
from HowlerMonkey.constants import *
from HowlerMonkey.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    def get_data_ingestion_config(self) -> DataIngestionConfig:

        config = self.config.data_ingestion
        kfold_config = self.config.kfold

        create_directories([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir        = Path(config.root_dir),
            n_assistant_images = config.n_assistant_images,
            fold_file= Path(kfold_config.root_dir) / kfold_config.fold_file,
            images_path = Path(kfold_config.images_path),
            labels_path= Path(kfold_config.labels_path),
        )
        
        
        return data_ingestion_config
    
    def get_training_config(self):

        training = self.config.training

        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir = Path(training.root_dir),
            model_path = training.model_path,
            data_file_path = Path(training.data_file_path),
            params_epochs = self.params.EPOCHS,
            params_batch_size = self.params.BATCH_SIZE,
            model_name= training.model_name
        )
        
        return training_config


In [5]:
import json
import shutil
import glob
import random

from HowlerMonkey.utils.common import copy_images, clear_and_create_folder
from HowlerMonkey import logger

In [6]:
class DataIngestion:

    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def load_fold_file(self):

        logger.info(f"Loading fold file")
        with open(self.config.fold_file, 'r') as f:
            self.folds_info = json.load(f)
        logger.info(f"Fold file loaded")

    def clear_folders(self):
        
        clear_and_create_folder(
            self.config.images_path / 'val', 
            self.config.labels_path / 'val'
        )

        clear_and_create_folder(
            self.config.images_path / 'train', 
            self.config.labels_path / 'train'
        )
        
    def get_images_paths(self):
        logger.info(f"Getting images paths")
        self.images_paths = glob.glob(str(self.config.images_path / 'main' / '*.jpg'))

    def get_assistant_images_paths(self):
        logger.info(f"Getting assistant images paths")
        self.assistant_images_paths = glob.glob(str(self.config.images_path / 'assistant' / '*.jpg'))

    def merge_assistant_data(self):

        self.get_assistant_images_paths()



        logger.info(f"Select {self.config.n_assistant_images} assistant images")
        
        self.assistant_images_paths = random.sample(
            self.assistant_images_paths, 
            self.config.n_assistant_images
        )

        
        copy_images(
            self.assistant_images_paths,
            self.config.labels_path / "assistant", 
            self.config.images_path / "train", 
            self.config.labels_path / "train"
        )



    def split_data(self, fold_id: int):

        self.get_images_paths()

        logger.info(f"Splitting data for fold {fold_id}")

        self.fold_data = next(
            fold for fold in self.folds_info 
                if fold['fold'] == fold_id
        )

        self.train_idx = self.fold_data['train_indices']
        train_images = [self.images_paths[i] for i in self.train_idx]

        copy_images(
            train_images,
            self.config.labels_path / "main", 
            self.config.images_path / "train", 
            self.config.labels_path / "train"
        )



        self.val_idx = self.fold_data['val_indices']
        val_images = [self.images_paths[i] for i in self.val_idx]

        logger.info(f"Copying images to val folder")

        copy_images(
            val_images,
            self.config.labels_path / "main", 
            self.config.images_path / "val", 
            self.config.labels_path / "val"
        )




In [7]:
from ultralytics import YOLO

In [8]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config

    
    def get_model(self):
        logger.info(f"Loading model")
        self.model = YOLO(self.config.model_path)

    def train(self):
        
        logger.info(f"Training model")
        self.model.train(
            data=self.config.data_file_path,
            epochs=self.config.params_epochs,
            batch=self.config.params_batch_size,
            project=self.config.root_dir,
            name=self.config.model_name
        )

        logger.info(f"Model trained")

In [9]:
try:
    config = ConfigurationManager()
    data_ingestor = DataIngestion(config.get_data_ingestion_config())
    data_ingestor.load_fold_file()

    trainer = Training(config.get_training_config())
    trainer.get_model()

    for i in range(config.config.kfold.folds):

        data_ingestor.clear_folders()

        data_ingestor.split_data(fold_id=i)
        data_ingestor.merge_assistant_data()

        for _ in range(10):
            trainer.get_model()
            trainer.train()

except Exception as e:
    raise e

[2024-08-29 21:22:12,276: INFO: common] yaml file: config/config.yaml loaded successfully
[2024-08-29 21:22:12,279: INFO: common] yaml file: params.yaml loaded successfully
[2024-08-29 21:22:12,282: INFO: common] Creating directory: artifacts
[2024-08-29 21:22:12,283: INFO: common] Creating directory: artifacts/data_ingestion
[2024-08-29 21:22:12,284: INFO: 44110703] Loading fold file
[2024-08-29 21:22:12,286: INFO: 44110703] Fold file loaded
[2024-08-29 21:22:12,287: INFO: common] Creating directory: artifacts/training
[2024-08-29 21:22:12,288: INFO: 1000564568] Loading model
[2024-08-29 21:22:12,386: INFO: common] Clearing val folder
[2024-08-29 21:22:12,392: INFO: common] Creating val folder
[2024-08-29 21:22:12,393: INFO: common] Clearing train folder
[2024-08-29 21:22:12,398: INFO: common] Creating train folder
[2024-08-29 21:22:12,401: INFO: 44110703] Getting images paths
[2024-08-29 21:22:12,404: INFO: 44110703] Splitting data for fold 0
[2024-08-29 21:22:12,406: INFO: common] C