In [1]:
import os
%pwd

'd:\\ML-Projects\\04-Mobile-Price-Predictor\\research'

In [2]:
os.chdir("../")
%pwd

'd:\\ML-Projects\\04-Mobile-Price-Predictor'

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path

In [5]:
from Mobile_Price_Predictor.constants import *
from Mobile_Price_Predictor.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path
        )

        return data_transformation_config

In [9]:
import os
from Mobile_Price_Predictor.utils.common import read_yaml, create_directories
from Mobile_Price_Predictor.logging import logger
import pandas as pd
import numpy as np 

In [12]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def transform_data(self, df, m_dep_threshold=0.5, px_height_threshold=65, sc_w_threshold=2.54):
        """
        Apply transformations to the data.

        Parameters:
        df (DataFrame): The data to transform.
        is_train (bool): Whether the data is training data or test data.
        m_dep_threshold (float): The threshold for 'm_dep' column.
        px_height_threshold (int): The threshold for 'px_height' column.
        sc_w_threshold (float): The threshold for 'sc_w' column.

        Returns:
        DataFrame: The transformed data.
        """
        if not isinstance(df, pd.DataFrame):
            raise TypeError("Expected input to be a pandas DataFrame")

        df.loc[df["m_dep"] < m_dep_threshold, "m_dep"] = m_dep_threshold
        df.loc[df["px_height"] < px_height_threshold, "px_height"] = px_height_threshold
        df.loc[df["sc_w"] < sc_w_threshold, "sc_w"] = sc_w_threshold

        return df

    def convert(self):
        # Read the data
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)
        logger.info("Data read successfully")

        # Transform the data
        train_data = self.transform_data(train_data )
        test_data = self.transform_data(test_data)
        test_data.drop(['id'], axis=1, inplace=True)
        logger.info("Data transformed successfully")
        
        train_data.to_csv(os.path.join(self.config.root_dir, "train_data.csv"))
        test_data.to_csv(os.path.join(self.config.root_dir, "test_data.csv"))
        logger.info("Train and Test data made successfully")


In [13]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.convert()
except Exception as e:
    raise e

[2024-05-20 23:22:40,840: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-20 23:22:40,841: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-20 23:22:40,842: INFO: common: created directory at: artifacts]
[2024-05-20 23:22:40,842: INFO: common: created directory at: artifacts/data_transformation]
[2024-05-20 23:22:40,850: INFO: 4096429100: Data read successfully]
[2024-05-20 23:22:40,854: INFO: 4096429100: Data Transformed successfully]
[2024-05-20 23:22:40,871: INFO: 4096429100: Train and Test data made successfully]
