In [1]:
import os
os.chdir("../")

In [2]:

from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationRecommendConfig:
    root_dir: Path
    dataset_path: Path
    processed_dataset_path: Path
    recommend_dataset_path: Path
    tracked_recommend_dataset_path: Path

In [3]:
from anidex.constants import *
from anidex.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_recommend_config(self) -> DataTransformationRecommendConfig:
        config = self.config.recommed_data_preprocessing

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationRecommendConfig(
            root_dir=config.root_dir,
            dataset_path=config.dataset_path,
            processed_dataset_path=config.processed_dataset_path,
            recommend_dataset_path=config.recommend_dataset_path,
            tracked_recommend_dataset_path=config.tracked_recommend_dataset_path
        )

        return data_transformation_config

In [5]:
import sys
from dataclasses import dataclass

import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline


from anidex.utils.exception import CustomException
from anidex import logging
from anidex.utils.common import DropNaTransformer, DateTransformTransformer, FillnaTransformer, ReplaceValueTransformer, save_object
import os


In [6]:

class DataTransformationRecommend:
    def __init__(self,config : DataTransformationRecommendConfig):
        self.config = config


    def get_data_transformer_object(self, data):
        '''
        This is data transformation function
        '''
        try:

            preprocessor = Pipeline(
                steps=[('replace', ReplaceValueTransformer(9, np.nan)),
                       ('replace2', ReplaceValueTransformer("9", np.nan)),
                       ('dropna', DropNaTransformer(
                           subset=["exactPrice", "RentOrSale", "URLs"])),
                       ('date_transform', DateTransformTransformer(
                        date_column='postedOn')),
                       ('fill_na', FillnaTransformer(
                        columns=data.columns, value="Missing")),

                       ]
            )

            return preprocessor

        except Exception as e:
            raise CustomException(e, sys)

    def initiate_data_transformation_recommend(self):
        try:
            Data_path = self.config.dataset_path
            Dataset = pd.read_csv(Data_path)

            logging.info("Reading preprocessor object")

            preprocessing_obj = self.get_data_transformer_object(Dataset)

            data = preprocessing_obj.fit_transform(
                Dataset)

            logging.info(f"Saved preprocessed object. {data.head()}")
            logging.info(f"saving processor : {preprocessing_obj}")

            # Saving the file just to see if processed data is valid for model training
            dataset = pd.DataFrame(data)

            # save the dataframe as a csv file
            dataset.to_csv(self.config.processed_dataset_path, index=False)

            combined_fea = dataset["propertyType"] + "   " + dataset["locality"] + "   " + dataset["furnishing"] + "   " + dataset["city"] + \
                "   " + dataset["bedrooms"].astype("str") + "   " + dataset["bathrooms"].astype(
                    "str") + "   " + dataset["RentOrSale"]

            combined_fea_df = pd.DataFrame({"text": combined_fea, "propertyType": dataset["propertyType"], "locality": dataset[
                                           "locality"], "furnishing": dataset["furnishing"], "city": dataset["city"], "RentOrSale": dataset["RentOrSale"], "BHK": dataset["bedrooms"], "URLs": dataset["URLs"]})

            combined_fea_df.to_csv(self.config.recommend_dataset_path, index=False)
            combined_fea_df.to_csv(self.config.tracked_recommend_dataset_path, index=False)

            logging.info(
                f"Saved preprocessed data for recommendation {combined_fea_df.head()}")

            return combined_fea_df

        except Exception as e:
            raise CustomException(e, sys)


In [7]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_recommend_config()
    data_transformation = DataTransformationRecommend(config=data_transformation_config)
    data_transformation.initiate_data_transformation_recommend()
except Exception as e:
    raise e


[2024-02-08 11:56:26,504: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-08 11:56:26,512: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-08 11:56:26,516: INFO: common: created directory at: artifacts]
[2024-02-08 11:56:26,518: INFO: common: created directory at: artifacts/recommed]
[2024-02-08 11:56:26,829: INFO: 558128597: Reading preprocessor object]
[2024-02-08 11:56:28,196: INFO: 558128597: Saved preprocessed object.    exactPrice sqftPrice securityDeposit           propertyType  postedOn  \
0    240000.0     171.0         Missing  Multistorey Apartment       233   
1     12000.0      12.0         12000.0  Multistorey Apartment       234   
2     17000.0       7.0         Missing      Residential House       232   
3      5000.0   Missing         Missing      Residential House       230   
4     12000.0   Missing         24000.0  Multistorey Apartment       229   

  noOfLifts maintenanceChargesFrequency maintenanceCharges  \
0   Missi