In [1]:
import os 
os.chdir("../")


In [2]:
%pwd

'c:\\Users\\HP\\OneDrive\\Desktop\\pishing'

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    train_path: Path
    test_path:Path
    train_y_path:Path
    test_y_path:Path
    



In [4]:
from phishingdetection.constants import *
from phishingdetection.utils.common import read_yaml, create_directories
from sklearn.preprocessing import MinMaxScaler 
from phishingdetection.logging import logger

In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        # Corrected the syntax for  instantiation
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            train_y_path=config.train_y_path,
            test_y_path=config.test_y_path  # Swapped train_path and test_path
              # Swapped train_path and test_path
        )

        return data_transformation_config


In [6]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import logging
import pickle

class DataTransformation:
    def __init__(self, config:DataTransformationConfig):
        self.config = config

    def transform(self):
        data = pd.read_csv(r"artifacts\data_ingestion\train.csv")
        train_df, test_df = train_test_split(data, test_size=0.2)

        logging.info("Read train and test data completed")

        target_column_name = "phishing"

        input_feature_train_df = train_df.drop(columns=[target_column_name], axis=1)
        target_feature_train_df = train_df[target_column_name]

        input_feature_test_df = test_df.drop(columns=[target_column_name], axis=1)
        target_feature_test_df = test_df[target_column_name]

        scaler = MinMaxScaler()  
        scaler.fit(input_feature_train_df.values)  
        scaled_input_feature_train_df = scaler.transform(input_feature_train_df.values)
        scaled_input_feature_test_df = scaler.transform(input_feature_test_df.values)

        logging.info("Applying preprocessing object on training dataframe and testing dataframe.")

        # Save the scaler object using pickle
        with open('scalers.pkl', 'wb') as f:
            pickle.dump(scaler, f)

        # Further processing if required
        
        # Save transformed dataframes if needed
        scaled_train_df = pd.DataFrame(scaled_input_feature_train_df, columns=input_feature_train_df.columns)
        scaled_test_df = pd.DataFrame(scaled_input_feature_test_df, columns=input_feature_test_df.columns)
        logging.info("Transformation completed and saved scaler object.")
        scaled_train_df.to_csv(self.config.train_path, index=False)
        scaled_test_df.to_csv(self.config.test_path, index=False)
        target_feature_train_df.to_csv(self.config.train_y_path, index=False)
        target_feature_test_df.to_csv(self.config.test_y_path, index=False)

        logging.info("Transformation completed and saved scaler object.")

In [7]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation =DataTransformation (config=data_transformation_config)
    data_transformation.transform()
    
except Exception as e:
    raise e

[2024-04-21 17:07:41,729: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-04-21 17:07:41,733: INFO: common: yaml file: params.yaml loaded successfully]
[2024-04-21 17:07:41,735: INFO: common: created directory at: artifacts]
[2024-04-21 17:07:41,737: INFO: common: created directory at: artifacts/data_transformation]
[2024-04-21 17:07:41,908: INFO: 1644176505: Read train and test data completed]
[2024-04-21 17:07:41,952: INFO: 1644176505: Applying preprocessing object on training dataframe and testing dataframe.]
[2024-04-21 17:07:41,956: INFO: 1644176505: Transformation completed and saved scaler object.]
[2024-04-21 17:07:43,528: INFO: 1644176505: Transformation completed and saved scaler object.]
