In [1]:
import os
os.chdir('../')
%pwd

'/home/paladin/Downloads/Sensor-Fault-Detection'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen= True)
class DataTransformationConfig:
    root_dir: Path  
    train_data_file: Path
    test_data_file: Path   
    numerical_columns: Path
    drop_columns: Path
    preprocessor_file: Path

In [3]:
from sensorFaultDetection.constants import *
from sensorFaultDetection.utils import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 secret_filepath=SECRET_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH
                 ):
       
        self.config = read_yaml(config_filepath)
        self.secret = read_yaml(secret_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation        

        create_directories([config.ROOT_DIR])

        data_transformation_config = DataTransformationConfig(
            root_dir= config.ROOT_DIR,    
            train_data_file= self.config.data_ingestion.TRAIN_DATA_FILE,
            test_data_file= self.config.data_ingestion.TEST_DATA_FILE,            
            numerical_columns= self.schema.numerical_columns,
            drop_columns= self.schema.drop_columns,
            preprocessor_file= config.PREPROCESSOR_FILE

        )


        return data_transformation_config

In [5]:
import pandas as pd
import numpy as np
from sensorFaultDetection.utils import save_pickle
from sensorFaultDetection.logger import logging
from box import ConfigBox 
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

from sklearn.compose import ColumnTransformer, make_column_selector

In [6]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config


    def get_data_transformer_object(self):
       
        pipeline = Pipeline(
            steps=[                          
                ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
                ('scaler', StandardScaler())]
                )     

        logging.info(f'Data transformer pipeline is created!')
        
        return pipeline
    
    def initiate_data_transformation(self):
        train_df = pd.read_csv(self.config.train_data_file)
        train_df.drop(columns=self.config.drop_columns, axis=1, inplace=True)

        test_df = pd.read_csv(self.config.test_data_file)
        test_df.drop(columns=self.config.drop_columns, axis=1, inplace=True)

        target_feature  = 'class'
        y_train = train_df[target_feature]
        X_train = train_df[self.config.numerical_columns]

        y_test = test_df[target_feature]
        X_test = test_df[self.config.numerical_columns]    
        

        logging.info(f"Applying preprocessing object on both train and test dataframes")
        preprocessing_obj = self.get_data_transformer_object()
        
        X_train_arr = preprocessing_obj.fit_transform(X_train)       
        X_test_arr = preprocessing_obj.transform(X_test)        
                        
        save_pickle(path= self.config.preprocessor_file, obj= preprocessing_obj)

        return ConfigBox({'X_train': X_train_arr,
                          'X_test': X_test_arr,
                          'y_train': y_train.values,
                          'y_test': y_test.values})  

In [7]:
import sys
from sensorFaultDetection.exception import CustomException

In [8]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    transformed_data = data_transformation.initiate_data_transformation()    
except Exception as e:
    CustomException(e, sys)

In [9]:
transformed_data.y_test


array(['neg', 'neg', 'neg', ..., 'neg', 'neg', 'neg'], dtype=object)