In [1]:
import os 

In [2]:
%pwd

'/home/ahmed/project/Kidney-Disease-Classification-Deep-learning-project/recsearch'

In [3]:
import sys
import os

In [4]:
os.chdir('../')

In [5]:
%pwd

'/home/ahmed/project/Kidney-Disease-Classification-Deep-learning-project'

In [6]:
from dataclasses import dataclass 
from pathlib import Path 

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    update_base_model: Path 
    training_data: Path
    param_image_size: list
    param_batch_size: int
    param_epochs: int 
    params_augmentation: bool # import in param.yaml
    param_learning_rate: float

In [7]:
from project.constants import *
from project.utils import create_directories,read_yaml

In [8]:
class ConfigerationManager:
    def __init__(self, config=CONFIG_YAML_FILE, param=PARAM_YAML_FILE):
        self.config = read_yaml(config)
        self.param = read_yaml(param)
       
        create_directories(self.config.artifacts_root)
    
    def get_training_config(self):
        training = self.config.training 
        prepare_base_model = self.config.prepare_base_model
        trainig_data = os.path.join(self.config.data_ingestion.unzip_dir,"kidney-ct-scan-image")
        create_directories(training.root_dir)

        training_config = TrainingConfig(
            root_dir= training.root_dir, 
            trained_model_path=training.trained_model_path, 
            update_base_model=prepare_base_model.update_base_model, 
            training_data=trainig_data, 
            param_image_size=self.param.IMAGE_SIZE, 
            param_batch_size=self.param.BATCH_SIZE, 
            param_epochs=self.param.EPOCHS, 
            params_augmentation = self.param.AUGMENTATION,
            param_learning_rate= self.param.LEARNING_RATE
        )
        return training_config



In [9]:
import math
from pathlib import Path
import tensorflow as tf

2025-11-25 15:15:46.647802: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-25 15:15:46.820503: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-25 15:15:50.375666: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [10]:



class Training:
    def __init__(self, config:TrainingConfig):
        """
        Args:
            config: Instance of TrainingConfig containing all training parameters.
        """
        self.config = config
        

    def get_base_model(self):
        """Load and compile the base model."""
        self.model = tf.keras.models.load_model(self.config.update_base_model)
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.param_learning_rate),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=["accuracy"]
        )




    def train_valid_generator(self):
        """Create training and validation datasets using tf.data and modern augmentation."""

        # Image size: (H, W)
        img_size = tuple(self.config.param_image_size[:-1])
        batch_size = self.config.param_batch_size

        
        train_ds = tf.keras.utils.image_dataset_from_directory(
            self.config.training_data,
            validation_split=0.2,
            subset="training",
            seed=42,
            image_size=img_size,
            batch_size=batch_size,
            shuffle=True
        )

        
        val_ds = tf.keras.utils.image_dataset_from_directory(
            self.config.training_data,
            validation_split=0.2,
            subset="validation",
            seed=42,
            image_size=img_size,
            batch_size=batch_size,
            shuffle=False
        )

        
        # Normalization Layer (replaces rescale=1./255)
        normalization_layer = tf.keras.layers.Rescaling(1./255)

        train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
        val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))


        # Data Augmentation (modern version)       
        if self.config.params_augmentation:
            data_augmentation = tf.keras.Sequential([
                tf.keras.layers.RandomRotation(0.1),
                tf.keras.layers.RandomTranslation(0.2, 0.2),
                tf.keras.layers.RandomZoom(0.2),
                tf.keras.layers.RandomFlip("horizontal")
            ])

            train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

        # ------------------------------
        # Enable Prefetching for Performance
        # ------------------------------
        self.train_data = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
        self.val_data = val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)



    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        """Save the trained model to the given path."""
        model.save(str(path))

    def train(self, callbacks: list = None):
        """
        Train the model using the training and validation generators.

        Args:
            callbacks (list, optional): Keras callbacks for training.
        """
        # Calculate steps to cover all samples
        steps_per_epoch = math.ceil(self.train_data.samples / self.train_data.batch_size)
        validation_steps = math.ceil(self.val_data.samples / self.val_data.batch_size)

        # Train the model
        history = self.model.fit(
            self.train_data,
            epochs=self.config.param_epochs,
            steps_per_epoch=steps_per_epoch,
            validation_data=self.val_data,
            validation_steps=validation_steps,
            callbacks=callbacks,
            verbose=1
        )

        # Save the trained model
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

        return history



In [11]:
from project.components.model_training import Training 
from project.components.callbacks import CallBacks 
from project.configeration import ConfigerationManager

In [12]:
if __name__ == '__main__':
    
    # Initialize configuration manager
    config = ConfigerationManager()  

    # Prepare callbacks (optional, uncomment if needed)
    callbacks_config = config.get_prepare_callback_config()
    callback_list = CallBacks(config=callbacks_config).get_tb_ckpt_callbacks()

    # Training setup
    training_config = config.get_training_config()
    trainer = Training(config=training_config)  # fixed spelling from 'Traning'

    # Prepare model and data
    trainer.get_base_model()
    trainer.train_valid_generator()
    trainer.train()

    # Train the model (uncomment when callbacks are ready)
    trainer.train(callbacks=callback_list)




2025-11-25 15:15:53.333978: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Found 93 images belonging to 2 classes.
Found 372 images belonging to 2 classes.
Epoch 1/2


2025-11-25 15:15:58.171032: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 205520896 exceeds 10% of free system memory.
2025-11-25 15:15:58.322244: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 205520896 exceeds 10% of free system memory.
2025-11-25 15:15:58.895456: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 102760448 exceeds 10% of free system memory.
2025-11-25 15:15:59.148247: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 102760448 exceeds 10% of free system memory.


[1m 2/24[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m23s[0m 1s/step - accuracy: 0.6937 - loss: 0.8537 

2025-11-25 15:16:03.146580: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 205520896 exceeds 10% of free system memory.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 4s/step - accuracy: 0.7823 - loss: 3.4735 - val_accuracy: 1.0000 - val_loss: 1.4766e-06
Epoch 2/2
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 4s/step - accuracy: 0.9059 - loss: 1.1438 - val_accuracy: 1.0000 - val_loss: 0.0021
Epoch 1/2
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 4s/step - accuracy: 0.9382 - loss: 0.6040 - val_accuracy: 1.0000 - val_loss: 1.2461e-05
Epoch 2/2
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 4s/step - accuracy: 0.9086 - loss: 0.9895 - val_accuracy: 0.7742 - val_loss: 1.7523
