In [1]:
import os 
os.chdir('../') 
%pwd

'/home/paladin/Downloads/Sensor-Fault-Detection'

In [2]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path    
    trained_model_path: Path    
    valid_train_file: Path
    valid_test_file: Path
    evaluation_report_file: Path
    model_evaluation_changed_threshold: float
    target_column: str

In [3]:
from sensorFaultDetection.constants import *
from sensorFaultDetection.utils import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 secret_filepath=SECRET_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH
                 ):
       
        self.config = read_yaml(config_filepath)
        self.secret = read_yaml(secret_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation       
        
        create_directories([config.ROOT_DIR])

        get_model_evaluation_config = ModelEvaluationConfig(
            root_dir= config.ROOT_DIR,
            trained_model_path= self.config.model_trainer.ROOT_DIR,           
            valid_train_file= self.config.data_validation.VALID_TRAIN_FILE,
            valid_test_file= self.config.data_validation.VALID_TEST_FILE,                
            evaluation_report_file= config.EVALUATION_REPORT_FILE,
            model_evaluation_changed_threshold= self.params.MODEL_EVALUATION_CHANGED_THRESHOLD,
            target_column = self.params.TARGET_COLUMN
        )

        return get_model_evaluation_config

In [5]:
import sys
import numpy as np
import pandas as pd
import shutil
from sensorFaultDetection. logger import logging
from sensorFaultDetection.exception import CustomException
from sensorFaultDetection.utils import load_pickle, save_pickle, classifier_performance_report, write_yaml_file

In [6]:
class TargetValueMapping:
    def __init__(self):
        self.neg: int = 0
        self.pos: int = 1

    def to_dict(self):
        return self.__dict__

    def reverse_mapping(self):
        mapping_response = self.to_dict()

        return dict(zip(mapping_response.values(), mapping_response.keys()))


class ModelResolver:
    def __init__(self, trained_model_dir, best_model_dir):
        self.trained_model_dir = trained_model_dir
        self.best_model_dir = best_model_dir

    @staticmethod
    def is_dir_empty(path):       
        if os.path.exists(path) and not os.path.isfile(path):  
            # Checking if the directory is empty or not
            if not os.listdir(path):
                #Empty directory
                return False
            else:
                return True
        else:
            #The path is either for a file or not valid"
            return False
    
    
    def get_latest_model_path(self, model_dir) -> str:
        try:
            timestamps = os.listdir(model_dir)
            latest_timestamps = timestamps[-1]
            latest_model_path = os.path.join(model_dir, latest_timestamps, 'model.pkl')
            return latest_model_path
        except Exception as e:
            raise CustomException(e, sys)
        
    def is_model_exists(self) -> bool:
        try:
            if self.is_dir_empty(self.trained_model_dir) is False:
                return False
            
            latest_model_path = self.get_latest_model_path(self.trained_model_dir)
            if not os.path.exists(latest_model_path):
                return False
            
            if self.is_dir_empty(self.best_model_dir) is False:
                timestamps = os.listdir(self.trained_model_dir)
                latest_timestamps = timestamps[-1]
                source_file = os.path.join(self.trained_model_dir, latest_timestamps, 'model.pkl')
                destination_file = os.path.join(self.best_model_dir, latest_timestamps)
                os.makedirs(destination_file, exist_ok= True)
                shutil.copy (source_file, destination_file) 
                logging.info(f'There was no best model. Hence, a new best model saved to {destination_file}!')           
            
            best_model_path = self.get_latest_model_path(self.best_model_dir)
            if not os.path.exists(best_model_path):
                return False
            
            return True
        
        except Exception as e:
            CustomException(e, sys)

            
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    @staticmethod
    def read_data(file_path) -> pd.DataFrame:
        try:
            return pd.read_csv(file_path)
        except Exception as e:
            raise CustomException(e, sys)
    
    def initiate_model_evaluation(self) -> None:
        try:
            # valid train and test file dataframes
            train_dataframe = self.read_data(self.config.valid_train_file)
            logging.info(f"Train data is read from {self.config.valid_train_file}!")
            test_dataframe = self.read_data(self.config.valid_test_file)
            logging.info(f"Test data is read from {self.config.valid_test_file}!")

            # calculate model performance on whole dataframe
            df = pd.concat([train_dataframe, test_dataframe])
            input_feature_df = df.drop(columns=self.config.target_column, axis=1)
            target_feature_df = df[self.config.target_column]
            target_feature_df = target_feature_df.replace(TargetValueMapping().to_dict())

            # loading_trained model
            model_resolver = ModelResolver(self.config.trained_model_path, self.config.root_dir)
            status = model_resolver.is_model_exists()
            if not status:
                return logging.info("WARNING: There is no trained model path available!")                
          
            latest_model_path = model_resolver.get_latest_model_path(self.config.trained_model_path)
            latest_model = load_pickle(latest_model_path)           
            best_model_path = model_resolver.get_latest_model_path(self.config.root_dir)            
            best_model = load_pickle(best_model_path)

            y_true = np.array(target_feature_df)
            y_best_pred = best_model.predict(input_feature_df)
            y_latest_pred = latest_model.predict(input_feature_df)
            labels = ["Negative", "Positive"]
            
            best_metric_table= classifier_performance_report(
                y_true= y_true,
                y_pred= y_best_pred, 
                path= None, 
                classes=labels
                )
            logging.info(f"Model performance metrics for best model is completed!")
            
            
            latest_metric_table = classifier_performance_report(
                y_true= y_true, 
                y_pred= y_latest_pred, 
                path= None, 
                classes=labels
                )
            logging.info(f"Model performance metrics for latest model is completed!")

            improved_accuracy = latest_metric_table['f1-score'].values[-1] - best_metric_table['f1-score'].values[-1]            
            
            if improved_accuracy > self.config.model_evaluation_changed_threshold:  
                is_model_accepted = True              
                logging.info(f"Latest model performs better than the old version!")                
                best_metric_table= classifier_performance_report(
                    y_true= y_true,
                    y_pred= y_best_pred, 
                    path= os.path.join(os.path.dirname(best_model_path, 'performance_metrics.csv')),
                    classes=labels
                )

                new_best_model_path = os.path.join(os.path.dirname(best_model_path, 'model.pkl'))
                save_pickle(path= new_best_model_path, obj= latest_model)
                logging.info(f"Best model is replaced by a new vesion!") 

            else:
                is_model_accepted = False
                logging.info(f"Latest model does not perform better than the old version!")                

            evaluation_report = dict()
            evaluation_report['is_model_accepted'] = is_model_accepted
            evaluation_report['improved_accuracy']= float(improved_accuracy)
            evaluation_report['best_model_path']=  best_model_path
            evaluation_report['latest_model_path']=  latest_model_path

            write_yaml_file(path= self.config.evaluation_report_file, content= evaluation_report, replace= True)

        except Exception as e:
            raise CustomException(e, sys)
    
    

In [7]:
import sys
from sensorFaultDetection.exception import CustomException

In [8]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.initiate_model_evaluation()    
except Exception as e:
    CustomException(e, sys)