In [2]:
import os

In [3]:
%pwd

'c:\\Users\\shash\\fetch_assessment\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'c:\\Users\\shash\\fetch_assessment'

In [24]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class FeatureengineeringConfig:
    root_dir: Path
    data_path: Path


In [25]:
from fetch_assessment.constants import *
from fetch_assessment.utils.common import read_yaml, create_directories

In [26]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> FeatureengineeringConfig:
        config = self.config.feature_engineering

        create_directories([config.root_dir])

        feature_eng_config = FeatureengineeringConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            #tokenizer_name = config.tokenizer_name
        )

        return feature_eng_config

In [27]:
import pandas as pd

In [30]:
class Feature_engineering:
    def __init__(self,config:FeatureengineeringConfig):
        self.config = config
    
    def add_feature(self,data):
        # Feature Engineering
        data['# Date'] = pd.to_datetime(data['# Date'])
        data['Day_of_Week'] = data['# Date'].dt.dayofweek  # Monday=0, Sunday=6
        data['Month'] = data['# Date'].dt.month
        data['Day'] = data['# Date'].dt.day
        data['Year'] = data['# Date'].dt.year

        # For simplicity, we'll create lagged features for the previous 1, 2, and 3 days.
        # These are basic lag features; more sophisticated approaches can be used for time series forecasting.
        data['Lag_1'] = data['Receipt_Count'].shift(1)
        data['Lag_2'] = data['Receipt_Count'].shift(2)
        data['Lag_3'] = data['Receipt_Count'].shift(3)
        data.dropna(inplace=True)
        
        return data 

    def features(self):
        data=pd.read_csv(self.config.data_path)
        data_transformed=self.add_feature(data)
        data_transformed.to_csv(os.path.join(self.config.root_dir,"data.csv"))       


In [31]:
try:
    config = ConfigurationManager()
    Feature_engineering_config = config.get_data_transformation_config()
    data_transformation = Feature_engineering(config=Feature_engineering_config)
    data_transformation.features()
except Exception as e:
    raise e

[2023-11-12 20:39:30,239: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-11-12 20:39:30,241: INFO: common: yaml file: params.yaml loaded successfully]
[2023-11-12 20:39:30,243: INFO: common: created directory at: artifacts]
[2023-11-12 20:39:30,244: INFO: common: created directory at: artifacts/data_transformation]
