In [12]:
import os

In [13]:
%pwd

'c:\\Users\\gkart\\Desktop\\1-ProjectENDtoEND\\chicken_disease_classification'

In [3]:
os.chdir("../")
%pwd

'c:\\Users\\gkart\\Desktop\\1-ProjectENDtoEND\\chicken_disease_classification'

In [14]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen =True)
class TrainingConfig:
    root_dir:Path
    trained_model_path:Path
    updated_base_model_path:Path
    training_data : Path
    params_epochs: int
    params_batch_size:int
    params_is_augmentation :bool
    params_image_size:list
    # writing extra variables as we need the parameters and training data and base model to train the model and make predictions

# Note : Since i have not created any seprate pipeline for callbacks i am going to integrate that code of call backs here only 
# because callbacks are ideally used while model training only.
@dataclass(frozen =True)
class PrepareCallbacksConfig:
    root_dir:Path
    tensorboard_root_log_dir:Path
    checkpoint_model_filepath:Path

In [15]:
# Step - creating entity
from src.chickenDiseaseClassifier.constants import *
from src.chickenDiseaseClassifier.utils.common import read_yaml, create_directories

In [22]:
class ConfigurationManager:

    def __init__(self,config_filepath = CONFIG_FILE_PATH, params_filepath = PARAM_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.param = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    # As said earlier also embedding the code from callbacks part as well.
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        # This is a function from the os.path module that returns the directory name component of a given file path. 
        # It takes the file path as an argument and extracts the directory part, excluding the file name
        create_directories([Path(model_ckpt_dir),Path(config.tensorboard_root_log_dir)])

        prepare_callback_config = PrepareCallbacksConfig(root_dir = config.root_dir, 
                                                         tensorboard_root_log_dir = config.tensorboard_root_log_dir,
                                                         checkpoint_model_filepath = config.checkpoint_model_filepath
                                                        )
        return prepare_callback_config
    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        param = self.param
        prepare_base_model = self.config.prepare_base_model
        training_data = os.path.join(self.config.data_ingestion.unzip_dir , "Chicken-fecal-images")
        create_directories([training.root_dir])

        training_config = TrainingConfig(   root_dir = training.root_dir,
                                            trained_model_path= training.trained_model_path,
                                            updated_base_model_path = self.config.prepare_base_model.updated_base_model_path,
                                            training_data = Path(training_data),
                                            params_epochs= param.EPOCHS,
                                            params_batch_size= param.BATCH_SIZE,
                                            params_is_augmentation  =param.AUGMENTATION,
                                            params_image_size= param.IMAGE_SIZE
                                            )
        return training_config



In [17]:
# updating components
import time
import tensorflow as tf


class PrepareCallback:
    def __init__(self,config : PrepareCallbacksConfig):
        self.config  =config
    
    @property
    #  This is a Python decorator that marks the method as a property.
    #  It allows the method to be accessed like an attribute without the need for explicit function call parentheses.
    def create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(self.config.tensorboard_root_log_dir, f"tensorboard_logs_at_{timestamp}")
        

        return tf.keras.callbacks.TensorBoard(log_dir  = tb_running_log_dir)
        # The code snippet tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir) creates a TensorBoard callback 
        # instance in TensorFlow Keras. 
        # This callback is used to log training and evaluation metrics during the model training process at the specified position. 

    @property
    def create_checkpoint_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(self.config.checkpoint_model_filepath,save_best_only=True)
    # It is used to save the model's weights during the training process

    def get_ckpt_tb_callbacks(self):
        return [self.create_tb_callbacks , self.create_checkpoint_callbacks]



In [23]:
class Training:
    def __init__(self, config : TrainingConfig):
        self.config  =config

    def get_base_model(self):
        self.model = tf.keras.models.load_model(self.config.updated_base_model_path)
    
    def train_valid_generator(self):
# This method will be used to do data_augumentation on the fly.

        datagenerator_kwargs ={"validation_split" :0.2,"rescale" : 1./255}

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( **datagenerator_kwargs)
        # This is the valid_datagenerator i am creating for validation data
        #  This ImageDataGenerator class in TensorFlow (tf.keras) is a powerful tool for generating augmented batches of 
        # image data during model training. It is primarily used in deep learning tasks, particularly in computer vision
        #  applications, to apply data augmentation techniques to image data.
        # Note : Validation_split = 0.2 will not create any separate folders in my training_data directory.The generator doesn't create separate "validation" and 
        # "training" folders. Instead, it generates batches of data on-the-fly, following the split ratio you've specified.
        # In Python, the double asterisk ** is used to pass a dictionary as a set of keyword arguments to a function or method. 


        dataflow_kwargs = {"target_size" : self.config.params_image_size[:-1], "batch_size" : self.config.params_batch_size,"interpolation": "bilinear"}

        self.validgenerator = valid_datagenerator.flow_from_directory(directory= self.config.training_data, subset="validation", shuffle= "False",**dataflow_kwargs)
        
        # The flow_from_directory() method takes a path of a directory and generates batches of augmented data.
        # subset: This parameter is set to "validation," indicating that you want to generate data for validation purposes.
        #  This assumes that your dataset is structured in a way that you have a separate subdirectory for training data and
        #  another one for validation data.

        train_datagenerator = valid_datagenerator
        # this is the train_datagenerator i am creating for training data. as of now for simplicity i am keeping both the 
        # training and validation datagenerator as same or equal

        self.traingenerator = train_datagenerator.flow_from_directory(directory = self.config.training_data, subset = "training",shuffle = "False",**dataflow_kwargs)
        # This subset= training means data generator will only load the images and labels that are designated as training data.
        
    @staticmethod
    def save_model(path: Path,model: tf.keras.Model):
        model.save(path)

    def train(self,callbacks :list):
        self.model.fit(self.traingenerator,epochs = self.config.params_epochs,batch_size = self.config.params_batch_size,
                       validation_data=self.validgenerator,callbacks = callbacks)
    # I am using the augumented data returned by traingenerator created in above function
    # callbacks: This is a list of callbacks that will be used to monitor the training process and to intervene if necessary.

    # A callback is a set of functions to be applied at given stages of the training procedure.
    # pass a list of callbacks (as the keyword argument callbacks ) to the . fit() method of the Sequential or Model classes.

        self.save_model(self.config.trained_model_path,self.model)

In [24]:
# creating pipeline
try:
    configuration_manager = ConfigurationManager()
    training_config = configuration_manager.get_training_config()
    callback_config = configuration_manager.get_prepare_callback_config()
    callback_obj1 = PrepareCallback(callback_config)
    callbacks = callback_obj1.get_ckpt_tb_callbacks()
    training = Training(training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(callbacks)
except Exception as e:
    raise e

[2023-08-08 15:51:57,931: INFO: common: yaml file : config\config.yaml loaded sucesfully]
[2023-08-08 15:51:57,946: INFO: common: yaml file : params.yaml loaded sucesfully]
[2023-08-08 15:51:57,951: INFO: common: Create directory at : artifacts]
[2023-08-08 15:51:57,957: INFO: common: Create directory at : artifacts/training]
[2023-08-08 15:51:57,962: INFO: common: Create directory at : artifacts\prepare_callbacks\checkpoint_dir]
[2023-08-08 15:51:57,968: INFO: common: Create directory at : artifacts\prepare_callbacks\tensorboard_log_dir]


Found 78 images belonging to 2 classes.


AttributeError: 'Training' object has no attribute 'training_data'