In [1]:
import os
os.chdir('../')
%pwd

'/home/paladin/Downloads/Sensor-Fault-Detection'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen= True)
class DataTransformationConfig:
    root_dir: Path  
    train_data_file: Path
    test_data_file: Path  
    train_npy_file : Path
    test_npy_file: Path 
    target_column: str 
    preprocessor_file: Path

In [3]:
from sensorFaultDetection.constants import *
from sensorFaultDetection.utils import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 secret_filepath=SECRET_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH
                 ):
       
        self.config = read_yaml(config_filepath)
        self.secret = read_yaml(secret_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation        

        create_directories([config.ROOT_DIR])

        data_transformation_config = DataTransformationConfig(
            root_dir= config.ROOT_DIR,    
            train_data_file= self.config.data_validation.VALID_TRAIN_FILE,
            test_data_file= self.config.data_validation.VALID_TEST_FILE, 
            train_npy_file = config.TRAIN_NPY_FILE,
            test_npy_file= config.TEST_NPY_FILE,
            target_column= self.params.TARGET_COLUMN,                       
            preprocessor_file= config.PREPROCESSOR_FILE

        )

        return data_transformation_config

In [5]:
import sys
import pandas as pd
import numpy as np
from sensorFaultDetection.utils import save_pickle, save_numpy_array
from sensorFaultDetection.logger import logging
from sklearn.preprocessing import RobustScaler
from imblearn.combine import SMOTETomek
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sensorFaultDetection.exception import CustomException


In [6]:

class TargetValueMapping:
    def __init__(self):
        self.neg: int = 0
        self.pos: int = 1

    def to_dict(self):
        return self.__dict__

    def reverse_mapping(self):
        mapping_response = self.to_dict()

        return dict(zip(mapping_response.values(), mapping_response.keys()))

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config       

    @staticmethod
    def read_data(file_path) -> pd.DataFrame:
        try:
            return pd.read_csv(file_path)
        except Exception as e:
            raise CustomException(e, sys)
    
    @classmethod
    def get_data_transformer_object(cls) -> Pipeline:
        try:       
            pipeline = Pipeline(
                steps=[                          
                    ('Imputer', SimpleImputer(strategy='constant', fill_value=0)), # replace missing values with zero
                    ('RobustScaler', RobustScaler())] # keep every feature in same range and handle ouliers
                    )     

            logging.info(f'Data transformer pipeline is created!')        
            return pipeline
        
        except Exception as e:
            raise CustomException(e, sys)
    
    def initiate_data_transformation(self):
        try:
            train_df = self.read_data(self.config.train_data_file)
            logging.info(f"Train data is read from {self.config.train_data_file}!")
            test_df = self.read_data(self.config.test_data_file)        
            logging.info(f"Test data is read from {self.config.test_data_file}!")             
            
            # train dataset
            input_feature_train_df = train_df.drop(columns=self.config.target_column, axis=1)
            target_feature_train_df = train_df[self.config.target_column]
            target_feature_train_df = target_feature_train_df.replace(TargetValueMapping().to_dict())
            
            # test dataset
            input_feature_test_df = test_df.drop(columns=self.config.target_column, axis=1)
            target_feature_test_df = test_df[self.config.target_column]
            target_feature_test_df = target_feature_test_df.replace(TargetValueMapping().to_dict())              

            logging.info(f"Applying preprocessing object on both train and test dataframes")
            preprocessor_obj = self.get_data_transformer_object()
            
            transformed_input_train_feature = preprocessor_obj.fit_transform(input_feature_train_df)       
            transformed_input_test_feature = preprocessor_obj.transform(input_feature_test_df)                

            smt = SMOTETomek(sampling_strategy='minority')  
            logging.info(f"Applying data balancer object on both train and test data")         

            input_feature_train_final, target_feature_train_final = smt.fit_resample(transformed_input_train_feature, target_feature_train_df)
            input_feature_test_final, target_feature_test_final = smt.fit_resample(transformed_input_test_feature, target_feature_test_df)            

            train_arr = np.c_[input_feature_train_final, np.array(target_feature_train_final)]
            test_arr = np.c_[input_feature_test_final, np.array(target_feature_test_final)]

            save_numpy_array(path=self.config.train_npy_file, array=train_arr )
            save_numpy_array(path=self.config.test_npy_file, array=test_arr)                            
            save_pickle(path= self.config.preprocessor_file, obj= preprocessor_obj)
        
        except Exception as e:
            raise CustomException(e, sys)

        

In [7]:
import sys
from sensorFaultDetection.exception import CustomException

In [8]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.initiate_data_transformation()    
except Exception as e:
    CustomException(e, sys)