In [2]:
import os
import sys
import time
from dataclasses import dataclass
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
from pathlib import Path
import tensorflow as tf
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directory
from cnnClassifier import CustomException

In [2]:
# os.chdir('d:\\codes\\DeepLearning_Proj\\proj1\\research')

In [3]:
os.chdir('../')
%pwd

'd:\\codes\\DeepLearning_Proj\\proj1\\research'

In [4]:
@dataclass(frozen = True)
class TrainingConfig:
    root_dir                  : Path 
    trained_model_path        : Path
    updated_base_model_path   : Path
    training_data             : Path 
    params_epochs             : int
    params_batch_size         : int
    params_is_augmentation    : bool
    params_image_size         : list

@dataclass(frozen = True)
class PrepareCallbacksConfig:
    root_dir                  : Path
    tensorboard_root_log_dir  : Path
    checkpoint_model_filepath : Path

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directory([self.config.artifacts_root])

    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        # THIS IS TAKING THE DIRECTORY NAME checkpoint_dir AND THE DIRECTORY WILL BE CREATED
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)

        # THE DIRECTORYES ARE CREATED IN THE BELOW CODE
        create_directory([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir                  = Path(config.root_dir),
            tensorboard_root_log_dir  = Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath = Path(config.checkpoint_model_filepath)
        )
        return prepare_callback_config
    
    def get_training_config(self) -> TrainingConfig:
        training                      = self.config.training
        prepare_base_model            = self.config.prepare_base_model
        params                        = self.params

        training_data = os.path.join(self.config.data_ingestion.unzip_dir , "Chicken-fecal-images")
        create_directory([training.root_dir])

        training_config = TrainingConfig(
            root_dir                  = Path(training.root_dir),
            trained_model_path        = Path(training.training_model_path),
            updated_base_model_path   = Path(prepare_base_model.update_base_model_path),
            training_data             = Path(training_data),
            params_epochs             = params.EPOCHS,
            params_batch_size         = params.BATCH_SIZE,
            params_is_augmentation    = params.AUGMENTATION,
            params_image_size         = params.IMAGE_SIZE
        )

        return training_config


In [None]:
class prepareCallback:
    def __init__(self, config : PrepareCallbacksConfig):
        self.config = config

    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f'tb_log_at_{timestamp}'
        )
        return tf.keras.callbacks.TensorBoard(log_dir = tb_running_log_dir)
    
    @property
    def _create_ckpt_callbacks(self):

        return tf.keras.callbacks.ModelCheckpoint(
            filepath       = self.config.checkpoint_model_filepath,
            save_best_only = True
        )
    
    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

ModelCheckPoint -> COMMONLY USED DURING TRAINING TO SAVE THE MODELS WEIGHT AT CERTAIN INTERVALS OR WHEN THE MODEL ACHIVES BETTER PERFORMANCE.

tf.keras.callbacks.ModelCheckpoint(
    
            filepath        = self.config.checkpoint_model_filepath,   -> PATH WHERE THE WEIGHTS OF THE MODEL WILL BE SAVED
            save_best_only  = True                                     -> SAVES ONLY THE BEST MODEL, IF THE MONITORING METRICS DOES 
                                                                          NOT IMPROVE IT WOUNT OVERWRITE THE PREV SAVED CHECKPOINT
                                                  )

In [None]:
class Training:
    def __init__(self, config : TrainingConfig):
        self.config = config

    def get_base_model(self):
        # LOADING THE UPDATED BASE MODEL
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):

        datagenerator_kwargs = dict(
            rescale                = 1.0/255,
            validation_split       = 0.2
        )

        dataflow_kwargs = dict(
            target_size            = self.config.params_image_size[:-1],
            batch_size             = self.config.params_batch_size,
            interpolation          = "bilinear"
        )

        # DATAGENERATOR_KWARGS IS PASSED INTO THE valid_datagenerator
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            
        )

        



def train_valid_generator(self):

    THIS METHOD IS USED IN THE CONTEXT OF DATA PREPROCESSING 

    datagenerator_kwargs = dict(
        
        rescale = used to normalize or scale the pixel values of image (0 <-> 1) or (1 <-> -1),

        1/255 -> (0 <-> 1)range , 2/255 -> (0 <-> 2)range ...... 

        validation_split = defines the fraction of data that will be used for validation. 20% validation and 80% training etc......

        )

        ASSOCIATED WITH THE PREPARATION OF IMAGE DATA FOR TRAINING USING DATA GENERATORS.

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1], 

            [:-1] this will slice off the channel parameter that is 3 in our case.

            this means that the target image will be of size 224 X 224 we are maintaining the RGB 
            
            THE IMAGE SIZE IS [224,224,3](VGG166) TAKEN FROM THE PARAMS FILE
            
            batch_size=self.config.params_batch_size,

            specifies the number of images that the model processes in each training iteration. THIS WILL AFFERCT BOTH MEMORY AND TRAINING SPEED.

            interpolation = 'bilinear' 

            method determines how the pixel in the image are interpolated or calculated when resizing the image. 

            'bilinear' -> computes new pixel value by considering the weight average of the nearest 2 X 2 pixel to the target location.

            interpolation="bilinear"
        )

    the generator created from flow_from_directory -> is designed specifically to handle a subset of the available image data

    self.valid_generator = valid_datagenerator.flow_from_directory(

        directory -> the chicken fecal images will be passed over here

        subset -> 'validation' the generator will focus on retrieving and processing images intended for validating the performance

    )
