In [1]:
import os
import sys

In [2]:
os.chdir('../')

In [3]:
from dataclasses import dataclass
from pathlib import Path
from typing import List

In [4]:
@dataclass
class ModelTrainerConfig:
    root_dir: Path
    model_path: str
    train_gen_path: str
    val_gen_path: str
    mlflow_uri: str
    experiment_name: str
    input_shape: list
    num_classes: int
    learning_rate: float
    epochs: int
    dropout_rate: float
    loss: str
    monitor: str
    patience: int
    reduce_lr_factor: float
    reduce_lr_patience: int
    reduce_lr_min_delta: float

In [5]:
from NeuroScan.utils.helpers import *
from NeuroScan.constants.paths import *

In [6]:
class ModelConfigurationManager:
    def __init__(self, config_file=CONFIG_PATH, params_file=PARAMS_PATH):
        self.config = read_yaml(config_file)
        self.params = read_yaml(params_file)
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.model_params

        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=Path(config.root_dir),
            model_path=config.model_path,
            mlflow_uri=config.mlflow_uri,
            experiment_name=config.experiment_name,
            input_shape=params.input_shape,
            num_classes=params.num_classes,
            learning_rate=params.learning_rate,
            epochs=params.epochs,
            dropout_rate=params.dropout_rate,
            loss=params.loss,
            monitor=params.monitor,
            patience=params.patience,
            reduce_lr_factor=params.reduce_lr_factor,
            reduce_lr_patience=params.reduce_lr_patience,
            reduce_lr_min_delta=params.reduce_lr_min_delta,
            train_gen_path=config.train_gen_path,
            val_gen_path=config.val_gen_path,
        )

In [7]:
import numpy as np
from NeuroScan.utils.logging import logger
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB1
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalMaxPooling2D, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import mlflow
import mlflow.tensorflow

class ModelTrainer:
    
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.model = None
        self.history = None
        self.train_generator = None
        self.valid_generator = None

        mlflow.set_tracking_uri(self.config.mlflow_uri)
        mlflow.set_experiment(self.config.experiment_name)

        self.run = mlflow.start_run()
        self._initialize_generators()

    def _initialize_generators(self):
        """Initializes generators from saved files."""
        try:
            if os.path.exists(self.config.train_gen_path) and os.path.exists(self.config.val_gen_path):
                logger.info("Loading pre-saved generators...")
                train_data = np.load(self.config.train_gen_path, allow_pickle=True).item()
                val_data = np.load(self.config.val_gen_path, allow_pickle=True).item()
                self.train_generator = tf.data.Dataset.from_tensor_slices((train_data['data'], train_data['labels'])).batch(32)  
                self.valid_generator = tf.data.Dataset.from_tensor_slices((val_data['data'], val_data['labels'])).batch(32) 
                logger.info("Pre-saved generators loaded successfully.")
            else:
                logger.error("Saved generator files not found. Please run data_transformation first.")
                raise FileNotFoundError("Generator files missing.")
        except Exception as e:
            logger.error(f"Error initializing generators: {e}")
            raise

    def build_model(self):
        """Constructs the model architecture."""

        try:
            base_model = EfficientNetB1(
                weights='imagenet',
                include_top=False,
                input_shape=tuple(self.config.input_shape)
            )
            model = base_model.output
            model = GlobalMaxPooling2D()(model)
            model = Dropout(self.config.dropout_rate)(model)
            model = Dense(self.config.num_classes, activation="softmax")(model)
            self.model = Model(inputs=base_model.input, outputs=model)
            self.model.compile(
                optimizer=Adam(learning_rate=self.config.learning_rate),
                loss=self.config.loss,
                metrics=['accuracy']
            )
            logger.info("Model architecture built and compiled successfully.")

        except Exception as e:
            logger.error(f"Error building model: {e}")
            raise

    def setup_callbacks(self):
        """Configures callbacks for training."""

        try:
            checkpoint = ModelCheckpoint(
                self.config.model_path,
                monitor=self.config.monitor,
                save_best_only=True,
                mode='auto',
                verbose=1
            )
            earlystop = EarlyStopping(
                monitor=self.config.monitor,
                patience=self.config.patience,
                mode='auto',
                verbose=1
            )
            reduce_lr = ReduceLROnPlateau(
                monitor=self.config.monitor,
                factor=self.config.reduce_lr_factor,
                patience=self.config.reduce_lr_patience,
                min_delta=self.config.reduce_lr_min_delta,
                mode='auto',
                verbose=1
            )
            return [checkpoint, earlystop, reduce_lr]

        except Exception as e:
            logger.error(f"Error setting up callbacks: {e}")
            raise

    def log_training_metrics(self, history):
        """Logs training metrics to MLflow."""
        
        try:
            train_acc = np.array(history.history['accuracy'])
            val_acc = np.array(history.history['val_accuracy'])
            mean_train_acc = np.mean(train_acc)
            std_train_acc = np.std(train_acc)
            mean_val_acc = np.mean(val_acc)
            std_val_acc = np.std(val_acc)
            mlflow.log_metric("mean_train_accuracy", mean_train_acc)
            mlflow.log_metric("std_train_accuracy", std_train_acc)
            mlflow.log_metric("mean_val_accuracy", mean_val_acc)
            mlflow.log_metric("std_val_accuracy", std_val_acc)
            logger.info(f"Logged training metrics: mean_train_acc={mean_train_acc:.4f}, std_train_acc={std_train_acc:.4f}, "
                        f"mean_val_acc={mean_val_acc:.4f}, std_val_acc={std_val_acc:.4f}")

        except Exception as e:
            logger.error(f"Error logging training metrics: {e}")
            raise

    def train(self):
        """Trains the model using pre-initialized data generators."""

        try:
            if self.model is None:
                self.build_model()

            callbacks = self.setup_callbacks()
            logger.info("Starting model training...")
            self.history = self.model.fit(
                self.train_generator,
                epochs=self.config.epochs,
                validation_data=self.valid_generator,
                callbacks=callbacks,
                verbose=1
            )

            logger.info("Model training completed.")
            self.log_training_metrics(self.history)
            mlflow.tensorflow.log_model(self.model, "model")
            return self.model, self.history
            
        except Exception as e:
            logger.error(f"Error during training: {e}")
            raise

        finally:
            mlflow.end_run()


In [None]:
try:
        model_config = ModelConfigurationManager()
        trainer_config = model_config.get_model_trainer_config()
        trainer = ModelTrainer(config=trainer_config)
        model, history = trainer.train()
except Exception as e:
        logger.error(f"Model training failed: {e}")
        raise