In [1]:
import os
import sys
import time
from dataclasses import dataclass
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
from pathlib import Path
import tensorflow as tf
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directory
from cnnClassifier import CustomException

In [2]:
# os.chdir('d:\\codes\\DeepLearning_Proj\\proj1\\research')

In [2]:
os.chdir('../')
%pwd

'd:\\codes\\DeepLearning_Proj\\proj1'

In [8]:
@dataclass(frozen = True)
class TrainingConfig:
    root_dir                  : Path 
    trained_model_path        : Path
    updated_base_model_path   : Path
    training_data             : Path 
    params_epochs             : int
    params_batch_size         : int
    params_is_augmentation    : bool
    params_image_size         : list

@dataclass(frozen = True)
class PrepareCallbacksConfig:
    root_dir                  : Path
    tensorboard_root_log_dir  : Path
    checkpoint_model_filepath : Path

In [56]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directory([self.config.artifacts_root])

    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        # THIS IS TAKING THE DIRECTORY NAME checkpoint_dir AND THE DIRECTORY WILL BE CREATED
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)

        # THE DIRECTORYES ARE CREATED IN THE BELOW CODE
        create_directory([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir                  = Path(config.root_dir),
            tensorboard_root_log_dir  = Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath = Path(config.checkpoint_model_filepath),
        )
        return prepare_callback_config
    
    def get_training_config(self) -> TrainingConfig:
        training                      = self.config.training
        prepare_base_model            = self.config.prepare_base_model
        params                        = self.params

        training_data = os.path.join(self.config.data_ingestion.unzip_dir , "Chicken-fecal-images")
        create_directory([Path(training.root_dir)])

        training_config = TrainingConfig(
            root_dir                  = Path(training.root_dir),
            trained_model_path        = Path(training.trained_model_path),
            updated_base_model_path   = Path(prepare_base_model.update_base_model_path),
            training_data             = Path(training_data),
            params_epochs             = params.EPOCHS,
            params_batch_size         = params.BATCH_SIZE,
            params_is_augmentation    = params.AUGMENTATION,
            params_image_size         = params.IMAGE_SIZE
        )

        return training_config

In [57]:
class PrepareCallback:
    def __init__(self, config : PrepareCallbacksConfig):
        self.config = config

    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f'tb_log_at_{timestamp}'
        )
        return tf.keras.callbacks.TensorBoard(log_dir = tb_running_log_dir)
    
    @property
    def _create_ckpt_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(
            filepath       = 'artifacts\prepare_callbacks\checkpoint_dir\model.h5',
            save_best_only = True
        )
    
    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

ModelCheckPoint -> COMMONLY USED DURING TRAINING TO SAVE THE MODELS WEIGHT AT CERTAIN INTERVALS OR WHEN THE MODEL ACHIVES BETTER PERFORMANCE.

tf.keras.callbacks.ModelCheckpoint(
    
            filepath        = self.config.checkpoint_model_filepath,   -> PATH WHERE THE WEIGHTS OF THE MODEL WILL BE SAVED
            save_best_only  = True                                     -> SAVES ONLY THE BEST MODEL, IF THE MONITORING METRICS DOES 
                                                                          NOT IMPROVE IT WOUNT OVERWRITE THE PREV SAVED CHECKPOINT
                                                  )

In [66]:
class Training:
    def __init__(self, config : TrainingConfig):
        self.config = config

    def get_base_model(self):
        # LOADING THE UPDATED BASE MODEL
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):
        datagenerator_kwargs = dict(
            rescale                = 1.0/255,
            validation_split       = 0.20
        )

        dataflow_kwargs = dict(
            target_size            = self.config.params_image_size[:-1],
            batch_size             = self.config.params_batch_size,
            interpolation          = "bilinear"
        )

        # DATAGENERATOR_KWARGS IS PASSED INTO THE valid_datagenerator
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        # THIS DATA GENERATOR IS MADE FOR GENRATING IMAGES FOR VALIDATION
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory = self.config.training_data,
            subset = 'validation',
            shuffle = False,
            **dataflow_kwargs
        )

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range     = 40,
                horizontal_flip    = True,
                width_shift_range  = 0.2,
                height_shift_range = 0.2,
                shear_range        = 0.2,
                zoom_range         = 0.2,
                **datagenerator_kwargs
            )
        else:
            # IF THE AUGMENTATION IS NOT TRUE THEN THE TRAIN_DATAGENERATOR WILL BE A REGULAR RESIZING AND NORMALIZATION
            train_datagenerator = valid_datagenerator

        # THIS DATA GENERATOR IS MADE FOR GENERATING IMAGES FOR TRAINING
        self.train_generator = train_datagenerator.flow_from_directory(
            directory = self.config.training_data,
            subset = 'training',
            shuffle = True,
            **dataflow_kwargs
        )

    @staticmethod
    def save_model(path : Path, model: tf.keras.Model):
        model.save(path)

    def train(self, callback_list: list):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs               = self.config.params_epochs,
            steps_per_epoch      = self.steps_per_epoch,
            validation_steps     = self.validation_steps,
            validation_data      = self.valid_generator,
            callbacks            = callback_list
        )

        self.save_model(
            path                 = self.config.trained_model_path,
            model                = self.model
        )

def train_valid_generator(self):

    THIS METHOD IS USED IN THE CONTEXT OF DATA PREPROCESSING 

    datagenerator_kwargs = dict(
        
        rescale = used to normalize or scale the pixel values of image (0 <-> 1) or (1 <-> -1),

        1/255 -> (0 <-> 1)range , 2/255 -> (0 <-> 2)range ...... 

        validation_split = defines the fraction of data that will be used for validation. 20% validation and 80% training etc......

        )

        ASSOCIATED WITH THE PREPARATION OF IMAGE DATA FOR TRAINING USING DATA GENERATORS.

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1], 

            [:-1] this will slice off the channel parameter that is 3 in our case.

            this means that the target image will be of size 224 X 224 we are maintaining the RGB 
            
            THE IMAGE SIZE IS [224,224,3](VGG166) TAKEN FROM THE PARAMS FILE
            
            batch_size=self.config.params_batch_size,

            specifies the number of images that the model processes in each training iteration. THIS WILL AFFERCT BOTH MEMORY AND TRAINING SPEED.

            interpolation = 'bilinear' 

            method determines how the pixel in the image are interpolated or calculated when resizing the image. 

            'bilinear' -> computes new pixel value by considering the weight average of the nearest 2 X 2 pixel to the target location.

            interpolation="bilinear"
        )

    GENERATOR -> usually refers to an instance of 'ImageDataGenerator' or similar data generator used to process and load data for training or validation task.

    IMAGEDATAGENERATOR -> handle the loading and processing of large dataset, especially images, they perform real - time data augmentation, normalization, resizing

    the generator created from flow_from_directory -> is designed specifically to handle a subset of the available image data

######## -----------------------------------------------------------------------------

    .flow_from_directory -> employed for generating a data iterator or generator that reads and processes images directly from a specific directory structure.

    DIRECTORY STRUCTURE -> assumes a directory structure where the subdirectory corresponds to different classes, each subdirectory contains images belonging to a respective class.

    DATA LOADER -> Loads and processes image data from the provided directory, utilizing the structure to automatically assign labels based on the subdirectory name

####### ------------------------------------------------------------------------------

    self.valid_generator = valid_datagenerator.flow_from_directory(

        directory -> the chicken fecal images will be passed over here

        subset -> 'validation' configures the generator to focus on fetching and processing images exclusively reserved for evaluating the model's performance. This ensures that the model is tested on independent, unseen data.

        shuffle -> it means that the order of validation images will not be shuffled, they will be processed in the order they are found in.

        **dataflow_kwargs -> the above dictionary will be unpacked here.
    )

    # in the params folder we have set the augmentation to true hence the augmentation data generator will be generated.

    if self.config.params_is_augmentation:
        train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            rotation_range = Specifies the range within which the images can be randomly rotated      during training, [40 then (-40 <-> +40).degrees]

            horizontal_flip = Enables or disables the flipping of image randomly during training,

            width_shift_range = allow shifting the width of images randomly during training,

            height_shift_range = allow shifting the height of images randomly during training,

            shear_range = range (tilting/slanting) of images during training,

            zoom_range = defines the range for random zooming in or out of images during training,

            The datagenerator_kwargs dictionary will be unpacked here.

            **datagenerator_kwargs
        )

    def train(self, callback_list: list):

        # calculates the number of steps that will make up one epoch during the training process
        self.steps_per_epoch = total number of samples in the training dataset // size of each batch during training

        number of images = 200 
        batch size       = 10 
        steps_per_epoch = 20, meaning the model will update its weight 50 times by iterating through 50 batches.
        Setting the number of epochs helps in determining when to stop the training process.

        self.steps_per_epoch = 200/10 = 20, it specifies the number of times the model will update its weight during one epoch by iterating through the batches. 

        # determines the number of steps(batches) for validation
        self.validation_steps = signifies the total number of samples in the validation dataset // specifies the size of each batch used for validation.

In [71]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    CustomException(e,sys)

[2023-11-03 03:44:27,221] 31 root - INFO - YAML FILE config\config.yaml LOADED SUCCESSFULLY
[2023-11-03 03:44:27,226] 31 root - INFO - YAML FILE params.yaml LOADED SUCCESSFULLY
[2023-11-03 03:44:27,227] 50 root - INFO - CREATED DIRECTORY AT : artifacts
[2023-11-03 03:44:27,228] 50 root - INFO - CREATED DIRECTORY AT : artifacts\prepare_callbacks\checkpoint_dir
[2023-11-03 03:44:27,228] 50 root - INFO - CREATED DIRECTORY AT : artifacts\prepare_callbacks\tensorflow_log_dir
[2023-11-03 03:44:27,231] 50 root - INFO - CREATED DIRECTORY AT : artifacts\training
Found 78 images belonging to 2 classes.
Found 312 images belonging to 2 classes.
Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
