In [1]:
import os

In [2]:
%pwd

'e:\\Bappy\\Coding\\Youtube\\Office\\End-to-End-Chicken-Disease-Classification-using-Fecal-Image\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'e:\\Bappy\\Coding\\Youtube\\Office\\End-to-End-Chicken-Disease-Classification-using-Fecal-Image'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list

In [6]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [11]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    


    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chicken-fecal-images")
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE
        )

        return training_config

In [12]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [None]:
class Training:
    # PURPOSE:
    # This class handles the training stage of the ML pipeline.
    # It loads a prepared model, creates data generators,
    # trains the model, and saves the trained model.

    def __init__(self, config: TrainingConfig):
        # Store configuration so paths and hyperparameters
        # are not hardcoded inside the class
        self.config = config

    
    def get_base_model(self):
        # MEANING:
        # Load the updated base model created in the previous pipeline stage
        #
        # WHY:
        # Training should start from a prepared (transfer-learned) model,
        # not from scratch, to save time and improve accuracy
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )


    def train_valid_generator(self):
        # MEANING:
        # Create generators that load images from disk in batches
        #
        # WHY:
        # Using generators is memory-efficient and scalable
        # for large image datasets

        # Common preprocessing settings
        datagenerator_kwargs = dict(
            rescale=1. / 255,        # MEANING: Normalize pixel values to [0, 1]
                                     # WHY: Helps model train faster and more stably
            validation_split=0.20    # MEANING: Split data into train (80%) and val (20%)
                                     # WHY: Needed to evaluate model performance
        )

        # Image loading settings
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],  # MEANING: Resize images
                                                             # WHY: Model expects fixed input size
            batch_size=self.config.params_batch_size,        # MEANING: Images per batch
                                                             # WHY: Controls memory usage and speed
            interpolation="bilinear"                          # WHY: Good default for resizing images
        )

        # ===============================
        # VALIDATION DATA GENERATOR
        # ===============================

        # MEANING:
        # Validation data generator is used ONLY for evaluation
        #
        # WHY:
        # Validation data must represent real, untouched data.
        # Augmentation would distort images and give misleading accuracy.

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",     # MEANING: Use validation split
            shuffle=False,           # WHY: Keep order fixed for consistent evaluation
            **dataflow_kwargs
        )

        # ===============================
        # TRAINING DATA GENERATOR
        # ===============================

        # MEANING:
        # Training data generator is used for learning
        #
        # WHY:
        # Augmentation helps the model generalize better
        # and reduces overfitting

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,      # WHY: Make model rotation-invariant
                horizontal_flip=True,   # WHY: Handle left-right variations
                width_shift_range=0.2,  # WHY: Improve robustness to translations
                height_shift_range=0.2,
                shear_range=0.2,        # WHY: Handle geometric distortions
                zoom_range=0.2,         # WHY: Improve scale invariance
                **datagenerator_kwargs
            )
        else:
            # MEANING:
            # If augmentation is disabled, reuse validation generator settings
            #
            # WHY:
            # Keeps preprocessing consistent while avoiding extra transformations
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",       # MEANING: Use training split
            shuffle=True,            # WHY: Shuffle improves learning
            **dataflow_kwargs
        )

    
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        # MEANING:
        # Save model to disk
        #
        # WHY:
        # Trained models are artifacts needed for evaluation,
        # inference, and deployment
        model.save(path)


    def train(self):
        # MEANING:
        # Train the model using training and validation generators
        #
        # WHY:
        # Generator-based training is memory-efficient
        # and suitable for large datasets

        # Calculate number of steps per epoch
        self.steps_per_epoch = (
            self.train_generator.samples // self.train_generator.batch_size
        )

        self.validation_steps = (
            self.valid_generator.samples // self.valid_generator.batch_size
        )

        # Train the model
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,      # WHY: Controls training duration
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator   # WHY: Monitor generalization
        )

        # Save the final trained model
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )


In [14]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train()
    
except Exception as e:
    raise e

[2024-10-17 13:14:29,687: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-17 13:14:29,688: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-17 13:14:29,689: INFO: common: created directory at: artifacts]
[2024-10-17 13:14:29,689: INFO: common: created directory at: artifacts\training]
Found 78 images belonging to 2 classes.
Found 312 images belonging to 2 classes.
