In [2]:
import os

In [3]:
%pwd

'/home/kalema/Projects/MIIA-Pothole-Image-classification/research'

In [4]:
os.chdir('../')

In [5]:
%pwd

'/home/kalema/Projects/MIIA-Pothole-Image-classification'

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    """
    Configuration class for training the model.

    Attributes:
        root_dir (Path): The root directory where training-related data will be stored or processed.
        trained_model_path (Path): The filepath where the trained model will be saved.
        base_model_path (Path): The filepath of the base model.
        training_data (Path): The directory or filepath where training data is located.
        params_epochs (int): The number of epochs for training.
        params_batch_size (int): The batch size for training.
        params_is_augmentation (bool): Whether data augmentation is applied during training.
    """
    root_dir: Path
    trained_model_path: Path
    base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_image_size: list
    params_is_augmentation: bool

In [7]:
from potholeClassifier.constants import *
from potholeClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

2024-04-29 12:59:29.785443: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 12:59:30.361599: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 12:59:30.364087: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
from pathlib import Path

class ConfigurationManager:
    """Class for managing configuration files and preparing base models.
    
    This class handles the loading of configuration files and parameters,
    as well as the creation of directories necessary for preparing base models.
    
    Attributes:
        config_filepath (str, optional): The filepath of the configuration file. Defaults to CONFIG_FILE_PATH.
        params_filepath (str, optional): The filepath of the parameters file. Defaults to PARAMS_FILE_PATH.
    
    Methods:
        get_prepare_base_model_config(): Retrieves the configuration for preparing base models.
    """

    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        """Initializes the ConfigurationManager.

        Args:
            config_filepath (str, optional): The filepath of the configuration file. Defaults to CONFIG_FILE_PATH.
            params_filepath (str, optional): The filepath of the parameters file. Defaults to PARAMS_FILE_PATH.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        """
        Retrieves the training configuration parameters and constructs a TrainingConfig object.

        This method extracts the training configuration parameters from the overall configuration and parameters files,
        constructs the path to the training data directory, creates necessary directories, and packages all the parameters
        into a TrainingConfig object.

        Returns:
            TrainingConfig: An instance of TrainingConfig containing the training configuration parameters.

        Raises:
            ValueError: If any required configuration parameter is missing or invalid.
        """
        training = self.config.training
        base_model_config = self.config.prepare_base_model
        params = self.params

        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "dataset/train")

        create_directories([Path(training.root_dir)])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            base_model_path=Path(base_model_config.base_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_image_size=params.IMAGE_SIZE,
            params_is_augmentation=params.AUGMENTATION
        )

        return training_config

In [9]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time
from datetime import datetime
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import numpy as np

In [12]:

class Training:
    """
    Class for training a machine learning model using configured parameters and data.

    This class is responsible for loading the updated base model, preparing data generators for training and validation,
    and training the model using the provided data.

    Attributes:
        config (TrainingConfig): The configuration for training the model.

    Methods:
        get_base_model(): Loads the updated base model for training.
        train_valid_generator(): Prepares data generators for training and validation.
        train(): Trains and saves the best model.

    """
    def __init__(self, config: TrainingConfig):
        """
        Initializes the Training object with the provided configuration.

        Args:
            config (TrainingConfig): The configuration for training the model.
        """
        self.config = config

    def get_base_model(self):
        """
        Loads the updated base model for training.

        This method loads the updated base model from the specified path in the training configuration.
        """
        self.model = tf.keras.models.load_model(
            self.config.base_model_path
        )
        
    def train_valid_generator(self):
        """
        Prepares data generators for training and validation.

        This method prepares data generators for training and validation using the specified parameters
        in the training configuration. It applies data augmentation techniques if enabled.
        """
        # Data generator and flow configuration parameters
        datagenerator_kwargs = dict(
            rescale=1./255,
            validation_split=0.20
        )
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
        )
        
        # Prepare training data generator with or without augmentation
        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=20,
                height_shift_range=20,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                **datagenerator_kwargs
            )

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset='training',
            shuffle=True,
            class_mode='categorical',
            **dataflow_kwargs
        )

        self.valid_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset='validation',
            shuffle=True,
            class_mode='categorical',
            **dataflow_kwargs
        )

    def train(self):
        """Train the model using the provided training generator and validation data.

        Args:
            callback_list (list): A list of callbacks to be used during training.
        """
        # Calculate steps per epoch and validation steps based on generator samples and batch size
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=1e-5)

        checkpoint = ModelCheckpoint(filepath=self.config.trained_model_path,
                                    verbose=1,
                                    save_best_only=True)
        
        callbacks = [checkpoint, lr_reducer]

        start = datetime.now()

        self.model.fit(
                        self.train_generator,
                        validation_data=self.valid_generator,
                        epochs=self.config.params_epochs,
                        steps_per_epoch=self.steps_per_epoch,
                        validation_steps=self.validation_steps,
                        callbacks=callbacks,
                        verbose=1)

        

In [13]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train()
except Exception as e:
    raise e

[2024-04-29 13:02:41,792: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-04-29 13:02:41,803: INFO: common: yaml file: params.yaml loaded successfully]
[2024-04-29 13:02:41,811: INFO: common: Created directory at: artifacts]
[2024-04-29 13:02:41,812: INFO: common: Created directory at: artifacts/training]


Found 576 images belonging to 2 classes.
Found 144 images belonging to 2 classes.
Epoch 1/30


2024-04-29 13:02:42.784698: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-04-29 13:02:44.490954: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 201867264 exceeds 10% of free system memory.
2024-04-29 13:02:45.066370: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 97329152 exceeds 10% of free system memory.
2024-04-29 13:02:46.419203: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 97329152 exceeds 10% of free system memory.
2024-04-29 13:02:47.800641: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 100933632 exceeds 10% of free system memory.
2024-04-29 13:02:47.800767: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 201867264 exceeds 10%

 3/36 [=>............................] - ETA: 2:21 - loss: 1.2256 - accuracy: 0.5208

KeyboardInterrupt: 