In [1]:
import os

In [2]:
%pwd

'd:\\GoFreeLabTechnologies\\Internship projects\\CI_CD_Pipeline\\CI_CD_Pipeline-ML_flow-DVC\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\GoFreeLabTechnologies\\Internship projects\\CI_CD_Pipeline\\CI_CD_Pipeline-ML_flow-DVC'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    model_path: Path
    model: str
    load_data: Path
    mlflow_uri: str
    params: dict

In [6]:
import dagshub
dagshub.init(repo_owner='ShoneJolly', repo_name='CI_CD_Pipeline-ML_flow-DVC', mlflow=True)

# import mlflow
# with mlflow.start_run():
#   mlflow.log_param('parameter name', 'value')
#   mlflow.log_metric('metric name', 1)

In [7]:
from ImageForgeryDetection.constants import *
from ImageForgeryDetection.utils.common import read_yaml, create_directories, save_json

In [8]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
    
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        
        config = self.config.model_evaluation
        params = self.params.trainer_evaluation

        model_evaluation_config = ModelEvaluationConfig(
            model_path= config.model_path,
            model= config.model,
            load_data= config.load_data,
            mlflow_uri= config.mlflow_uri,
            params=params
        )

        return model_evaluation_config

In [9]:
from ImageForgeryDetection import logger
import joblib
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
import mlflow
import mlflow.keras
from urllib.parse import urlparse
from pathlib import Path
from ImageForgeryDetection.utils.common import save_json
import os
import tempfile

  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "


In [None]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        self.model = None
        self.X_test = None
        self.y_test = None
        self.score = None

    def load_data(self):
        """Loads test data from joblib files specified in config."""
        logger.info(f"Loading test data from {self.config.load_data}")
        try:
            x_path = Path(self.config.load_data) / 'X_90.joblib'
            y_path = Path(self.config.load_data) / 'y.joblib'
            X = joblib.load(x_path)
            y = joblib.load(y_path)
            logger.info(f"Loaded X with shape {X.shape} and y with shape {y.shape}")
            return X, y
        except Exception as e:
            logger.error(f"Error loading data: {e}")
            raise

    def split_data(self, X, y):
        """Splits data into training and testing sets."""
        logger.info("Splitting data into train and test sets")
        try:
            from sklearn.model_selection import train_test_split
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42, stratify=y
            )
            logger.info(f"Test set: X_test shape {X_test.shape}, y_test shape {y_test.shape}")
            return X_test, y_test
        except Exception as e:
            logger.error(f"Error splitting data: {e}")
            raise

    def preprocess_data(self, X_test, y_test):
        """Reshapes test data for CNN input."""
        logger.info("Preprocessing test data")
        try:
            X_test = X_test.reshape(X_test.shape[0], 128, 128, 3)
            y_test = y_test.reshape(y_test.shape[0], 2)
            logger.info(f"Reshaped X_test to {X_test.shape}, y_test to {y_test.shape}")
            self.X_test, self.y_test = X_test, y_test
        except Exception as e:
            logger.error(f"Error preprocessing data: {e}")
            raise

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        """Loads the trained model."""
        logger.info(f"Loading model from {path}")
        try:
            return tf.keras.models.load_model(path)
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            raise

    def get_test_generator(self):
        """Returns a Sequence generator for test data."""
        class TestGenerator(Sequence):
            def __init__(self, X, y, batch_size, **kwargs):
                super().__init__(**kwargs)  # Initialize Sequence base class
                self.X = X
                self.y = y
                self.batch_size = batch_size
                self.indexes = np.arange(len(self.X))

            def __len__(self):
                return int(np.floor(len(self.X) / self.batch_size))

            def __getitem__(self, index):
                indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
                X = [self.X[k] for k in indexes]
                y = [self.y[k] for k in indexes]
                return np.array(X), np.array(y)

        return TestGenerator(self.X_test, self.y_test, self.config.params['batch_size'])

    def evaluation(self):
        """Evaluates the model and saves scores."""
        logger.info("Starting model evaluation")
        try:
            # Load and preprocess data
            X, y = self.load_data()
            X_test, y_test = self.split_data(X, y)
            self.preprocess_data(X_test, y_test)

            # Load model
            model_path = Path(self.config.model_path) / self.config.model
            self.model = self.load_model(model_path)

            # Create test generator
            test_generator = self.get_test_generator()

            # Evaluate model
            logger.info("Evaluating model on test data")
            self.score = self.model.evaluate(
                test_generator,
                batch_size=self.config.params['batch_size'],
                return_dict=True
            )
            logger.info(f"Evaluation scores: {self.score}")

            # Save scores
            self.save_score()

        except Exception as e:
            logger.error(f"Error during evaluation: {e}")
            raise

    def save_score(self):
        """Saves evaluation scores to a JSON file."""
        logger.info("Saving evaluation scores")
        try:
            # Handle F1 score as a tensor array by computing mean
            f1_score = self.score.get('f1_score', 0.0)
            if isinstance(f1_score, tf.Tensor):
                f1_score = np.mean(f1_score.numpy())
            elif isinstance(f1_score, np.ndarray):
                f1_score = np.mean(f1_score)

            scores = {
                "loss": float(self.score.get('loss', 0.0)),
                "accuracy": float(self.score.get('accuracy', 0.0)),
                "precision": float(self.score.get('precision', 0.0)),
                "recall": float(self.score.get('recall', 0.0)),
                "f1_score": float(f1_score)
            }
            save_json(path=Path("scores.json"), data=scores)
            logger.info("Scores saved successfully")
        except Exception as e:
            logger.error(f"Error saving scores: {e}")
            raise

    def log_into_mlflow(self):
        """Logs parameters, metrics, and model to MLflow."""
        logger.info("Logging to MLflow")
        try:
            mlflow.set_tracking_uri(self.config.mlflow_uri)
            tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
            logger.info(f"MLflow tracking URI: {self.config.mlflow_uri}")

            with mlflow.start_run():
                # Log parameters
                mlflow.log_params(self.config.params)

                # Handle F1 score for logging
                f1_score = self.score.get('f1_score', 0.0)
                if isinstance(f1_score, tf.Tensor):
                    f1_score = np.mean(f1_score.numpy())
                elif isinstance(f1_score, np.ndarray):
                    f1_score = np.mean(f1_score)

                # Log metrics
                mlflow.log_metrics({
                    "loss": float(self.score.get('loss', 0.0)),
                    "accuracy": float(self.score.get('accuracy', 0.0)),
                    "precision": float(self.score.get('precision', 0.0)),
                    "recall": float(self.score.get('recall', 0.0)),
                    "f1_score": float(f1_score)
                })

                # Save model to a temporary .keras file
                with tempfile.TemporaryDirectory() as tmpdirname:
                    temp_model_path = os.path.join(tmpdirname, "model.keras")
                    logger.info(f"Saving model to temporary path: {temp_model_path}")
                    self.model.save(temp_model_path)
                    if not os.path.exists(temp_model_path):
                        raise FileNotFoundError(f"Failed to save model at {temp_model_path}")
                    logger.info(f"Verified model saved at {temp_model_path}, size: {os.path.getsize(temp_model_path)} bytes")

                    # Log model as artifact
                    logger.info("Logging model as MLflow artifact")
                    mlflow.log_artifact(temp_model_path, artifact_path="model")
                    logger.info("Model logged to MLflow as artifact successfully")

                    # Register the model in MLflow Model Registry
                    if tracking_url_type_store != "file":
                        logger.info("Registering model in MLflow as ImageForgeryDetectionModel")
                        client = mlflow.tracking.MlflowClient()
                        run_id = mlflow.active_run().info.run_id
                        try:
                            # Create or update model in registry
                            result = client.create_model_version(
                                name="ImageForgeryDetectionModel",
                                source=f"{mlflow.get_artifact_uri('model')}",
                                run_id=run_id
                            )
                            logger.info(f"Model registered as ImageForgeryDetectionModel, version {result.version}")
                        except mlflow.exceptions.RestException as e:
                            logger.error(f"Failed to register model: {e.json}, Status Code: {e.status_code}")
                            raise
                        except Exception as e:
                            logger.error(f"Unexpected error during model registration: {str(e)}")
                            raise

        except Exception as e:
            logger.error(f"Error logging to MLflow: {e}")
            raise

In [13]:
try:
        config = ConfigurationManager()
        eval_config = config.get_model_evaluation_config()
        evaluation = ModelEvaluation(eval_config)
        evaluation.evaluation()
        evaluation.log_into_mlflow()
except Exception as e:
        logger.error(f"Pipeline failed: {e}")
        raise

[2025-08-09 19:49:18,689: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-08-09 19:49:18,769: INFO: common: yaml file: params.yaml loaded successfully]
[2025-08-09 19:49:18,769: INFO: 373372311: Starting model evaluation]
[2025-08-09 19:49:18,777: INFO: 373372311: Loading test data from artifacts/data_preprocessing/pickle]
[2025-08-09 19:49:31,665: INFO: 373372311: Loaded X with shape (9501, 49152) and y with shape (9501, 2)]
[2025-08-09 19:49:31,667: INFO: 373372311: Splitting data into train and test sets]
[2025-08-09 19:49:36,887: INFO: 373372311: Test set: X_test shape (1901, 49152), y_test shape (1901, 2)]
[2025-08-09 19:49:37,237: INFO: 373372311: Preprocessing test data]
[2025-08-09 19:49:37,241: INFO: 373372311: Reshaped X_test to (1901, 128, 128, 3), y_test to (1901, 2)]
[2025-08-09 19:49:37,258: INFO: 373372311: Loading model from artifacts\model_trainer\model\model.keras]
[2025-08-09 19:49:38,713: INFO: 373372311: Evaluating model on test data]
[1m59/

2025/08/09 19:50:45 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ImageForgeryDetectionModel, version 5


[2025-08-09 19:50:45,354: INFO: 373372311: Model registered as ImageForgeryDetectionModel, version 5]
🏃 View run intrigued-robin-655 at: https://dagshub.com/ShoneJolly/CI_CD_Pipeline-ML_flow-DVC.mlflow/#/experiments/0/runs/517a7bb8e7d6416c86dc3e25ac000ae6
🧪 View experiment at: https://dagshub.com/ShoneJolly/CI_CD_Pipeline-ML_flow-DVC.mlflow/#/experiments/0
