In [1]:
import os

In [2]:
%pwd

'c:\\Users\\satya\\Documents\\data_science_roadmap\\Deep_Learning\\Project\\Kidney-Disease-Classification-MLFlow-DVC\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\satya\\Documents\\data_science_roadmap\\Deep_Learning\\Project\\Kidney-Disease-Classification-MLFlow-DVC'

In [17]:
#entity
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    # base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list
    params_learning_rate: float

In [18]:
#Config manager
from src.cnnClassifier.constants import *
from src.cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [19]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data =os.path.join(self.config.data_ingestion.unzip_dir, "dataset")
        create_directories([
            Path(training.root_dir)
        ])
        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            # base_model_path=Path(prepare_base_model.base_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            params_learning_rate=params.LEARNING_RATE
        )

        return training_config

In [20]:
#componenets
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time
tf.config.run_functions_eagerly(True)  # Force eager execution globally


In [None]:
class Training:
    def __init__(self,config: TrainingConfig):
        self.config = config
    
    def get_base_model(self): #To get the updted model
        self.model=tf.keras.models.load_model(
            self.config.updated_base_model_path
            # self.config.base_model_path
        )
        self.model.compile(
            loss=tf.keras.losses.CategoricalCrossentropy(),
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.params_learning_rate),
            metrics=["accuracy"]
        )
        
    def train_valid_generator(self): #Function for splitting the train and validation data with preprocessing

        datagenerator_kwargs = dict(
            rescale = 1./255, #Common normslisation step
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1], #Resizing the image to 224x224
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"  #A technique used for resizing the image. It uses a weighted average of the 4 nearest pixels.
        )

#preparing the validation data
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        ) #This will take 10% from normal and 10% from tumor directories roughly

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        ) #Resized image of 224x224 in validation set(20%, 10 % from each class)

#preparing the training data
        if self.config.params_is_augmentation: #If augmentation is kept true it will try to transform the image. Note it won't be adding data to the training set. Just each batch will get randomly transformed as per the arguments passed
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs #Rescaling and training 80%.
            )
        else:
            train_datagenerator = valid_datagenerator #If augemnetation is kept false it will not try to transform the image. Just rescaling. 

#Taking 80% of data from normal and tumor directories roughly. Doing data augmentation. Also shuffling to avoid memorization while training
        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )
        
        
    @staticmethod #Saving the train model
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)


#Iteration*batch=data points*epochs

    
    def train(self): #Function to train the model
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size #This is number of iteration per epoch. The number of times the model parameters will be updated per epoch.
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size #After every iteration the model will be validated. 
        #For example if the validation has 100 samples and the batch size is 10 the evaluation will be done for 100/10=10 samples. A record of correct samples is kept. in these 10 samples. then afterwards its added together in the end.

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

In [22]:
#pipeline
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train()

except Exception as e:
    raise e
    

[2025-03-28 23:37:27,631: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-28 23:37:27,634: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-28 23:37:27,636: INFO: common: created directory at: artifacts]
[2025-03-28 23:37:27,637: INFO: common: created directory at: artifacts\training]
Found 1056 images belonging to 2 classes.
Found 4231 images belonging to 2 classes.
Epoch 1/5
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1696s[0m 6s/step - accuracy: 0.6544 - loss: 5.4568 - val_accuracy: 0.7244 - val_loss: 4.0196
Epoch 2/5
[1m  1/264[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22:54[0m 5s/step - accuracy: 0.8125 - loss: 2.6754



[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 451ms/step - accuracy: 0.8125 - loss: 2.6754 - val_accuracy: 0.6619 - val_loss: 5.5331
Epoch 3/5
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1545s[0m 6s/step - accuracy: 0.8133 - loss: 2.0742 - val_accuracy: 0.8996 - val_loss: 1.8236
Epoch 4/5
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 422ms/step - accuracy: 0.9375 - loss: 0.2646 - val_accuracy: 0.8381 - val_loss: 2.1582
Epoch 5/5
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1473s[0m 6s/step - accuracy: 0.8682 - loss: 1.5353 - val_accuracy: 0.8608 - val_loss: 2.5644
