# **FEATURE ENGINEERING**

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [7]:
from src.utils.load_by_prefix import load_latest_file_by_prefix
from src.utils.spark_session import get_spark_session  
from src.loaders.config_loader import get_config

spark = get_spark_session()
config = get_config()
path = config["data"]["processed_dir"]

df_category_weekly_sales = load_latest_file_by_prefix(directory=path, prefix="category_sales_weekly")
df_city_weekly_sales = load_latest_file_by_prefix(directory=path, prefix="city_sales_weekly")

[INFO] Loading latest file: category_sales_weekly_v1.csv
[INFO] DataFrame loaded successfully. Shape: (2090, 3)
[INFO] Loading latest file: city_sales_weekly_v1.csv
[INFO] DataFrame loaded successfully. Shape: (2508, 3)


In [8]:
df_category_weekly_sales.columns

Index(['Product Category', 'Week Start', 'Category Weekly Sales'], dtype='object')

In [9]:
# creating new feature by calling pandas functions
from src.feature_engineering.feature_engineering_base import FeatureEngineering
fe = FeatureEngineering(df= df_category_weekly_sales)

df_category_weekly_sales = fe.run_all(
    group_col="Product Category",
    target_col="Category Weekly Sales",
    lags=[1,2,4],
    windows=[2,4]
)

[INFO] Detected Pandas DataFrame


In [10]:
from src.exporters.feature_df_expoter import FeatureExporter

# Pass pure Pandas DataFrames directly
exporter = FeatureExporter(
    main_df=None, 
    city_df=df_city_weekly_sales,  # already Pandas
    category_df=df_category_weekly_sales  # already Pandas
)
exporter.export_and_version_features()

[INFO] Using existing directory: ../data/processed
[SAVED] city_sales_weekly_features => ../data/processed\city_sales_weekly_features_v1.csv
[SAVED] category_sales_weekly_features => ../data/processed\category_sales_weekly_features_v1.csv

 All feature-engineered datasets saved with versioning.
Check the outputs in: ../data/processed


<src.exporters.feature_df_expoter.FeatureExporter at 0x1ad1c8a7bf0>