In [1]:
import os

In [2]:
%pwd

'c:\\Users\\satya\\Documents\\data_science_roadmap\\Deep_Learning\\Project\\Kidney-Disease-Classification-MLFlow-DVC\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\satya\\Documents\\data_science_roadmap\\Deep_Learning\\Project\\Kidney-Disease-Classification-MLFlow-DVC'

In [None]:
#entity
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    validation_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list
    params_learning_rate: float
    params_monitor: str
    params_patience: int

In [6]:
#Config manager
from src.cnnClassifier.constants import *
from src.cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_training_config(self) -> TrainingConfig:
            training = self.config.training
            prepare_base_model = self.config.prepare_base_model
            params = self.params
            training_data =os.path.join(self.config.data_transformation.split, "train")
            validation_data =os.path.join(self.config.data_transformation.split, "test")
            create_directories([
                Path(training.root_dir)
            ])
            training_config = TrainingConfig(
                root_dir=Path(training.root_dir),
                trained_model_path=Path(training.trained_model_path),
                updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
                # base_model_path=Path(prepare_base_model.base_model_path),
                training_data=Path(training_data),
                validation_data=Path(validation_data),
                params_epochs=params.EPOCHS,
                params_batch_size=params.BATCH_SIZE,
                params_is_augmentation=params.AUGMENTATION,
                params_image_size=params.IMAGE_SIZE,
                params_learning_rate=params.LEARNING_RATE,
                params_monitor=params.MONITOR,
                params_patience=params.PATIENCE
            )

            return training_config


In [None]:
#componenets
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time
tf.config.run_functions_eagerly(True)  # Force eager execution globally
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping


In [None]:

class Training:
    def __init__(self,config: TrainingConfig):
        self.config = config
    
    def get_base_model(self): #To get the updted model
        self.model=tf.keras.models.load_model(
            self.config.updated_base_model_path
            # self.config.base_model_path
        )
        self.model.compile(
            loss=tf.keras.losses.CategoricalCrossentropy(),
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.params_learning_rate),
            metrics=["accuracy"]
        )
        
    def train_valid_generator(self): #Function for splitting the train and validation data with preprocessing

        datagenerator_kwargs = dict(
            # rescale = 1./255, #Common normslisation step
            # validation_split=0.20
            preprocessing_function=preprocess_input

        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1], #Resizing the image to 224x224
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"  #A technique used for resizing the image. It uses a weighted average of the 4 nearest pixels.
        )

#preparing the validation data
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        ) #This will take 10% from normal and 10% from tumor directories roughly

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.validation_data,
            shuffle=False,
            **dataflow_kwargs
        ) #Resized image of 224x224 in validation set(20%, 10 % from each class)

#preparing the training data
        # if self.config.params_is_augmentation: #If augmentation is kept true it will try to transform the image. Note it won't be adding data to the training set. Just each batch will get randomly transformed as per the arguments passed
        train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            rotation_range=30,  # Wider rotation range
            vertical_flip=True,
            horizontal_flip=True,
            width_shift_range=0.1,  # Slightly larger shift range
            height_shift_range=0.1,
            shear_range=0.1,
            zoom_range=0.3,  # Increased zoom range
            brightness_range=[0.3, 1.7],  # Increased brightness range
            **datagenerator_kwargs
        )
        # else:
        #     train_datagenerator = valid_datagenerator #If augemnetation is kept false it will not try to transform the image. Just rescaling. 

#Taking 80% of data from normal and tumor directories roughly. Doing data augmentation. Also shuffling to avoid memorization while training
        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            shuffle=True,
            **dataflow_kwargs
        )
        
        
    @staticmethod #Saving the train model
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)


#Iteration*batch=data points*epochs

    
    def train(self): #Function to train the model
        # self.steps_per_epoch = math.ceil(self.train_generator.samples / self.train_generator.batch_size) #This is number of iteration per epoch. The number of times the model parameters will be updated per epoch.
        # self.validation_steps = math.ceil(self.valid_generator.samples / self.valid_generator.batch_size) #After every iteration the model will be validated. 
        #For example if the validation has 100 samples and the batch size is 10 the evaluation will be done for 100/10=10 samples. A record of correct samples is kept. in these 10 samples. then afterwards its added together in the end.
        early_stopping = EarlyStopping(
            patience=self.config.params_patience,
            monitor=self.config.params_monitor
        )
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            # steps_per_epoch=self.steps_per_epoch,
            # validation_steps=self.validation_steps,
            validation_data=self.valid_generator,
            callbacks=[early_stopping]
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

In [10]:
#pipeline
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train()

except Exception as e:
    raise e
    

[2025-04-02 10:21:12,877: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-02 10:21:12,880: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-02 10:21:12,881: INFO: common: created directory at: artifacts]
[2025-04-02 10:21:12,882: INFO: common: created directory at: artifacts\training]
Found 1056 images belonging to 2 classes.
Found 4231 images belonging to 2 classes.


  self._warn_if_super_not_called()


[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1704s[0m 6s/step - accuracy: 0.6792 - loss: 5.0359 - val_accuracy: 0.8826 - val_loss: 2.4238
