In [None]:
from dataclasses import dataclass 
import os 
from datetime import datetime 
import pandas as pd 
import numpy as np 


@dataclass
class Data_Ingestion_Artifact:
    train_file_path:str 
    test_file_path:str

@dataclass
class Data_validation_Artifact:
    validation_status:bool
    message: str
    drift_report_file_path: str

@dataclass
class DataTransformationArtifact:
    transformed_object_file_path:str 
    transformed_train_file_path:str
    transformed_test_file_path:str

# Artifacts
ARTIFACTS = 'artifacts'
PIPELINE_DIR = 'network'
@dataclass 
class NS_Train_Configeration:
    artifact_dir:str = ARTIFACTS
    pipeline_dir:str = PIPELINE_DIR
    TIMESTAMP = datetime.now().strftime('%m_%d_%Y_%H_%M_%S')

train_config = NS_Train_Configeration()

In [None]:
DATA_TRANSFORMATION_DIR:str = 'data_transformation'
DATA_TRANSFORMATION_TRANSFORM_FILE:str = 'transform'
DATA_TRANSFORMATION_OBJECT_DIR:str = 'transform_object' 

# data
RAW_DATA = 'raw.csv'
TRAIN_DATA = 'train.csv'
TEST_DATA = 'test.csv'
PREPROCESSOR_FILE = 'preprocessor.pkl'
TARGET_COLUMN = '------------------------------------------'
CURRENT_DATE = datetime.now()

class Data_Transformation_config:
    data_transformation_dir = os.path.join(train_config.artifact_dir,DATA_TRANSFORMATION_DIR)
    data_transformation_train_file = os.path.join(data_transformation_dir,DATA_TRANSFORMATION_TRANSFORM_FILE,TRAIN_DATA.replace('csv','npy'))
    data_transformation_test_file = os.path.join(data_transformation_dir,DATA_TRANSFORMATION_TRANSFORM_FILE,TEST_DATA.replace('csv','npy'))
    data_transformation_object = os.path.join(data_transformation_dir,DATA_TRANSFORMATION_OBJECT_DIR,PREPROCESSOR_FILE)
    

In [None]:
import os
import yaml

def read_yaml_file(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    with open(file_path, 'rb') as file:
        yaml.safe_load(file) 

def read_data(df:pd.DataFrame):
    return pd.read_csv(df)

def drop_col(df:pd.DataFrame,col):
    return df.drop(columns=[col],axis=1,inplace=True)

class TargetValueMapping:
    def __init__(self):
        self.male = 0
        self.female = 1 
    def _asdict(self):
        return self.__dict__ 
    def reverse(self):
        return dict(zip(self._asdict().values(), self._asdict().keys()))
    
def save_numpy_array(file_path: str, array: np.array):
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path, exist_ok=True)
        with open(file_path, 'wb') as file_obj:
            np.save(file_obj, array)

def save_object(file_path: str,obj):
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path, exist_ok=True)
        with open(file_path, 'wb') as file_obj:
            dill.dump(file_obj,obj)
    


In [None]:
from Network_Security.constant import SEHEMA_FILE_PATH
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,StandardScaler,PowerTransformer
from sklearn.compose import ColumnTransformer

class Data_Transformation:
    def __init__(self,data_ingestion_artifact=Data_Ingestion_Artifact,
                 data_validation_artifact=Data_validation_Artifact,
                 data_transformation_config=Data_Transformation_config):
        self.data_ingestion_artifact = data_ingestion_artifact
        self.data_validation_artifact= data_validation_artifact
        self.data_transformation_config= data_transformation_config
        self._sehema = read_yaml_file(file_path=SEHEMA_FILE_PATH)
    def get_data_transformation(self):
        ohe_transform = OneHotEncoder()
        or_transform = OrdinalEncoder()
        scaler = StandardScaler()
        pw_transform = PowerTransformer(method='yeo-johnson')

        ohe_col = self._sehema['ohe_columns']
        or_col = self._sehema['or_columns']
        num_col = self._sehema['numerical_columns']
        power_tf_col = self._sehema['transform_columns']

        preprocessor = ColumnTransformer([
            ('OneHotEncoder',ohe_transform,ohe_col),
            ('OrdinalEncoder',or_transform,or_col),
            ('PowerTransformer',pw_transform,power_tf_col),
            ('StandardScaler',scaler,num_col)

        ])
        return preprocessor 
    def init_data_transformation(self):
        if self.data_validation_artifact.validation_status:
            preprocessor = self.get_data_transformation()
            train_df = Data_Transformation.read_data(self.data_ingestion_artifact.train_file_path)
            test_df = Data_Transformation.read_data(self.data_ingestion_artifact.test_file_path)

            xtrain = train_df.drop(columns=[TARGET_COLUMN],axis=1)
            ytrain = train_df[TARGET_COLUMN]

            xtrain['company_age'] = CURRENT_DATE - xtrain['company_estabilish']
            drop_col = self._sehema['drop_col']
            xtrain=drop_col(xtrain,drop_col)

            ytrain = ytrain.replace(TargetValueMapping()._asdict())

            #test-df
            xtest = test_df.drop(columns=[TARGET_COLUMN],axis=1)
            ytest = test_df[TARGET_COLUMN]

            xtest['company_age'] = CURRENT_DATE - xtest['company_estabilish']
            drop_col = self._sehema['drop_col']
            xtest=drop_col(xtest,drop_col)

            ytest = ytest.replace(TargetValueMapping()._asdict())

            xtrain_arr = preprocessor.fit_transform(xtrain)
            xtest_arr = preprocessor.transform(xtest)

            from imblearn.combine import SMOTEENN 
            smt = SMOTEENN(sampling_strategy="minority")
            xtrain_arr,ytrain = smt.fit_resample(xtrain_arr,ytrain)

            smt = SMOTEENN(sampling_strategy="minority")
            xtest_arr,ytest = smt.fit_resample(xtest_arr,ytest)

            train_arr = np.c_[xtrain_arr,np.array(ytrain)]
            test_arr = np.c_[xtest_arr,np.array(ytest)]

            save_object(self.data_transformation_config.data_transformation_object,preprocessor)
            save_numpy_array(self.data_transformation_config.data_transformation_train_file,array=train_arr)
            save_numpy_array(self.data_transformation_config.data_transformation_train_file,array=test_arr)

            return DataTransformationArtifact(transformed_object_file_path= self.data_transformation_config.transformed_object_file_path,
                                                transformed_train_file_path= self.data_transformation_config.transformed_train_file_path,
                                                transformed_test_file_path= self.data_transformation_config.transformed_test_file_path
                                                )
                


In [None]:
class Training_Pipeline:
    def __init__(self):
    #   self.data_ingestion_config = Data_ingestion_Config()
    #   self.validation_config = Data_validation_config()
        self.data_transformation_config= Data_Transformation_config()



    # def start_data_ingestion(self)->Data_Ingestion_Artifact:
    #     data_ingestion = Data_Ingestion(ingestion_config=self.data_ingestion_config)
    #     data_ingestion_artifacet = data_ingestion.init_data_ingestion()
    #     return data_ingestion_artifacet

    # def start_data_validation(self,data_ingestion_artifacet:Data_Ingestion_Artifact)-> Data_validation_Artifact:
    #     data_valid = Data_validation(data_ingestion_artifacet=data_ingestion_artifacet,
    #                                   data_validation_config=self.data_validation_config)
    #     data_validation_Artifact = data_valid.init_data_ingestion()
    #     return data_validation_Artifact

    def start_data_transform(self,data_ingestion_artifacet:Data_Ingestion_Artifact,
                             data_validation_Artifact:Data_validation_Artifact)->DataTransformationArtifact:
        data_transform_config = Data_Transformation(
                                                    data_ingestion_artifacet=data_ingestion_artifacet,
                                                    data_validation_Artifact=data_validation_Artifact,
                                                    ata_transformation_config=Data_Transformation_config
                                                    )
        data_transform_artifact = data_transform_config.init_data_transformation()
        return data_transform_artifact

In [None]:
def start_data_ingestion(self) -> DataIngestionArtifact:
        """
        This method of TrainPipeline class is responsible for starting data ingestion component
        """
        try:
            logging.info("Entered the start_data_ingestion method of TrainPipeline class")
            logging.info("Getting the data from mongodb")
            data_ingestion = DataIngestion(data_ingestion_config=self.data_ingestion_config)
            data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
            logging.info("Got the train_set and test_set from mongodb")
            logging.info(
                "Exited the start_data_ingestion method of TrainPipeline class"
            )
            return data_ingestion_artifact
        except Exception as e:
            raise USvisaException(e, sys) from e
        

    
def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact:
        """
        This method of TrainPipeline class is responsible for starting data validation component
        """
        logging.info("Entered the start_data_validation method of TrainPipeline class")

        try:
            data_validation = DataValidation(data_ingestion_artifact=data_ingestion_artifact,
                                             data_validation_config=self.data_validation_config
                                             )

            data_validation_artifact = data_validation.initiate_data_validation()

            logging.info("Performed the data validation operation")

            logging.info(
                "Exited the start_data_validation method of TrainPipeline class"
            )

            return data_validation_artifact

        except Exception as e:
            raise USvisaException(e, sys) from e
        
    
def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact, data_validation_artifact: DataValidationArtifact) -> DataTransformationArtifact:
        """
        This method of TrainPipeline class is responsible for starting data transformation component
        """
        try:
            data_transformation = DataTransformation(data_ingestion_artifact=data_ingestion_artifact,
                                                     data_transformation_config=self.data_transformation_config,
                                                     data_validation_artifact=data_validation_artifact)
            data_transformation_artifact = data_transformation.initiate_data_transformation()
            return data_transformation_artifact
        except Exception as e:
            raise USvisaException(e, sys)
        
    