In [1]:
import os
os.chdir('../')
%pwd

'/home/paladin/Downloads/Sensor-Fault-Detection'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    train_npy_file: Path
    test_npy_file: Path
    trained_model_path: Path
    train_confusion_matrix_file: Path
    train_model_performance_file: Path
    test_confusion_matrix_file: Path
    test_model_performance_file: Path
    expected_accuracy_threshold: float
    overfit_underfit_threshold: float
    preprocessor_file: Path

In [3]:
from sensorFaultDetection.constants import *
from sensorFaultDetection.utils import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 secret_filepath=SECRET_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH
                 ):
       
        self.config = read_yaml(config_filepath)
        self.secret = read_yaml(secret_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer        
        train_dir = os.path.dirname(config.TRAIN_CONFUSION_MATRIX_FILE)
        test_dir = os.path.dirname(config.TEST_CONFUSION_MATRIX_FILE)
        
        create_directories([config.ROOT_DIR, train_dir, test_dir ])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.ROOT_DIR,             
            train_npy_file = self.config.data_transformation.TRAIN_NPY_FILE,
            test_npy_file = self.config.data_transformation.TEST_NPY_FILE,
            trained_model_path = config.TRAINED_MODEL_PATH,  
            train_confusion_matrix_file = config.TRAIN_CONFUSION_MATRIX_FILE, 
            train_model_performance_file= config.TRAIN_MODEL_PERFORMANCE_FILE,
            test_confusion_matrix_file = config.TEST_CONFUSION_MATRIX_FILE, 
            test_model_performance_file= config.TEST_MODEL_PERFORMANCE_FILE,
            expected_accuracy_threshold = self.params.EXPECTED_ACCURACY_THRESHOLD,
            overfit_underfit_threshold = self.params.OVERFIT_UNDERFIT_THRESHOLD,
            preprocessor_file = self.config.data_transformation.PREPROCESSOR_FILE

        )

        return model_trainer_config

In [5]:
import sys
import pandas as pd
from sensorFaultDetection. logger import logging
from sensorFaultDetection.exception import CustomException
from sensorFaultDetection.utils import load_numpy_array, confusion_matrix_display, classifier_performance_report, save_pickle, load_pickle
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier

In [6]:
class SensorModel:
    def __init__(self, preprocessing_object: Pipeline, trained_model_object: object):
        self.preprocessing_object = preprocessing_object

        self.trained_model_object = trained_model_object

    def predict(self, dataframe: pd.DataFrame) -> pd.DataFrame:
        logging.info("Entered predict method of SensorTruckModel class")

        try:
            logging.info("Using the trained model to get predictions")

            transformed_feature = self.preprocessing_object.transform(dataframe)

            logging.info("Used the trained model to get predictions")

            return self.trained_model_object.predict(transformed_feature)

        except Exception as e:
            raise CustomException(e, sys) 

    def __repr__(self):
        return f"{type(self.trained_model_object).__name__}()"

    def __str__(self):
        return f"{type(self.trained_model_object).__name__}()"


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def perform_hyper_parameter_tuning(self):
        pass
    
    def train_model(self, X_train, y_train):
        try:
            xgb_classifier = XGBClassifier()
            xgb_classifier.fit(X_train, y_train)
            return xgb_classifier

        except Exception as e:
            raise CustomException(e, sys)

    def initiate_model_trainer(self):
        try:
            # loading train and test arr
            train_arr = load_numpy_array(self.config.train_npy_file)
            test_arr = load_numpy_array(self.config.test_npy_file) 

            X_train, y_train, X_test, y_test = (
                train_arr[:, :-1],
                train_arr[:, -1],
                test_arr[:, :-1],
                test_arr[:, -1]
            )           
            
            model = self.train_model(X_train, y_train)
            logging.info("Training model is completed successfully!")
            
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)
            labels = ["Negative", "Positive"]

            confusion_matrix_display(
                y_true = y_train, 
                y_pred = y_train_pred, 
                path= self.config.train_confusion_matrix_file, 
                classes=labels
                )
            train_metric_table= classifier_performance_report(
                y_true= y_train,
                y_pred= y_train_pred, 
                path= self.config.train_model_performance_file, 
                classes=labels
                )
            logging.info(f"Model performance metrics for train data is completed and stored at {self.config.train_model_performance_file}!")
            
            confusion_matrix_display(
                y_true = y_test, 
                y_pred = y_test_pred, 
                path= self.config.test_confusion_matrix_file, 
                classes=labels
                )
            test_metric_table = classifier_performance_report(
                y_true= y_test, 
                y_pred= y_test_pred, 
                path= self.config.test_model_performance_file, 
                classes=labels
                )
            logging.info(f"Model performance metrics for test data is completed and stored at {self.config.test_model_performance_file}!")

            if train_metric_table['f1-score'].values[-1] < self.config.expected_accuracy_threshold:
                raise Exception("Trained model is not good to provide expected accuracy!")

            # Overfitting and Underfitting:
            # Check whether there is a significant difference between f1-score for both train and test or not            

            diff = abs(train_metric_table['f1-score'].values[-1] - test_metric_table['f1-score'].values[-1])            
            if diff > self.config.overfit_underfit_threshold:
                raise Exception("Model is not good, try to do more investigation")
            
            preprocessor = load_pickle(self.config.preprocessor_file)
            sensor_model = SensorModel(preprocessing_object= preprocessor, trained_model_object= model)            

            save_pickle(path= self.config.trained_model_path, obj= sensor_model)

        except Exception as e:
            raise CustomException(e, sys)

In [7]:
import sys
from sensorFaultDetection.exception import CustomException

In [8]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.initiate_model_trainer()    
except Exception as e:
    CustomException(e, sys)