In [1]:

import os
os.chdir('../')

In [8]:
from dataclasses import dataclass
from src.logger.custom_logging import logger
from pathlib import Path
from src.constants import *
from src.utils.utlis import *
from sklearn.impute import SimpleImputer
from src.exceptions.expection import CustomException
import pandas as pd
from sklearn.model_selection import train_test_split
import sys
import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

In [6]:
@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    preprocessor_obj: Path
    train_file_path:Path
    test_file_path:Path
    save_train_path: Path
    save_test_path: Path

In [7]:
class ConfigManager:
    def __init__(self,config_file=CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        self.config=read_yaml(config_file)
        self.params=read_yaml(params_filepath)
    

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self)->DataTransformationConfig:
        config=self.config.data_transformation
        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            preprocessor_obj=config.preprocessor_obj,
            train_file_path=config.train_file_path,
            test_file_path=config.test_file_path,
            save_train_path=config.save_train_path,
            save_test_path=config.save_test_path


        )    
        return data_transformation_config 

In [9]:
class DataTransformation:
    def __init__(self,config:DataTransformationConfig):
        self.config=config

    def create_preprocessor(self):
        try:
            logger.info('Creating data transformation pipeline')

            col_to_transform=['Amount','Time']

            
            preprocessor = ColumnTransformer(
            transformers=[
            ('num', RobustScaler(), col_to_transform)
            ])

            return preprocessor

        except Exception as e:
            logger.error(f"Error in creating data transformation pipeline: {str(e)}")
            raise CustomException(e, sys)
        
    def transform_data(self):
        train_path=self.config.train_file_path
        test_path=self.config.test_file_path
        
        try:
            train_data=pd.read_csv(train_path)
            test_data=pd.read_csv(test_path)

            target_column = 'Class'
            drop_columns = [target_column]

            preprocessor=self.create_preprocessor()
            input_feature_train_data = train_data.drop(columns=drop_columns)
            target_feature_train_data = train_data[target_column]
            input_feature_test_data = test_data.drop(columns=drop_columns)
            target_feature_test_data = test_data[target_column]

            input_train_arr=preprocessor.fit_transform(input_feature_train_data)
            input_test_arr=preprocessor.transform(input_feature_test_data)

            # Apply SMOTE for class balancing
            smote = SMOTE(random_state=42)
            input_train_resampled, target_train_resampled = smote.fit_resample(input_train_arr, target_feature_train_data)

            train_array = np.c_[input_train_resampled, target_train_resampled.values.reshape(-1, 1)]
            test_array = np.c_[input_test_arr, target_feature_test_data.values.reshape(-1, 1)]


            save_obj(file_path=self.config.preprocessor_obj,obj=preprocessor)

            train_df = pd.DataFrame(train_array)
            test_df = pd.DataFrame(test_array)

            train_df.to_csv(self.config.save_train_path, index=False,header=True)
            test_df.to_csv(self.config.save_test_path, index=False,header=True)


        except Exception as e:  
            raise CustomException(e, sys)        