In [1]:
import os
import sys

In [2]:
os.chdir('../')

In [3]:
sys.path.append(os.path.join(os.getcwd(), "src"))

In [4]:
import os
import sys
import json
import joblib
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from WattPredictor.entity.config_entity import EngineeringConfig
from WattPredictor.config.data_config import DataConfigurationManager
from WattPredictor.utils.feature import feature_store_instance
from WattPredictor.utils.helpers import create_directories, save_bin
from WattPredictor.utils.exception import CustomException
from WattPredictor.utils.logging import logger

class Engineering:
    def __init__(self, config: EngineeringConfig):
        self.config = config
        self.feature_store = feature_store_instance()

    def check_status(self):
        try:
            with open(self.config.status_file, 'r') as f:
                status_data = json.load(f)
            return status_data.get("validation_status", False)
        except Exception as e:
            logger.warning(f"Validation status check failed: {e}")
            return False

    def basic_preprocessing(self) -> pd.DataFrame:
        try:
            fg = self.feature_store.feature_store.get_feature_group(name="elec_wx_demands", version=2)
            df = fg.read()
            le = LabelEncoder()
            df['sub_region_code'] = le.fit_transform(df['subba'])
            df.rename(columns={'subba': 'sub_region', 'value': 'demand'}, inplace=True)
            df = df[['date_str', 'date', 'sub_region_code', 'demand', 'temperature_2m']]

            create_directories([os.path.dirname(self.config.label_encoder)])
            save_bin(le, self.config.label_encoder)
            self.feature_store.upload_file_safely(self.config.label_encoder, "label_encoder.pkl")

            logger.info("Label encoding and preprocessing complete. Columns: {}".format(list(df.columns)))
            return df
        except Exception as e:
            raise CustomException(f"Failed to preprocess data: {e}", sys)

    def feature_engineering(self, df: pd.DataFrame) -> pd.DataFrame:
        try:
            df['date'] = pd.to_datetime(df['date'], utc=True)
            df['hour'] = df['date'].dt.hour.astype('int64')
            df['day_of_week'] = df['date'].dt.dayofweek.astype('int64')
            df['month'] = df['date'].dt.month.astype('int64')
            df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype('int64')
            holidays = calendar().holidays(start=df['date'].min(), end=df['date'].max())
            df['is_holiday'] = df['date'].isin(holidays).astype('int64')
            df['temperature_2m'] = df['temperature_2m'].astype('float64')
            df['demand'] = df['demand'].astype('float64')

            self.feature_store.create_feature_group(
                name="elec_wx_features",
                df=df,
                primary_key=["date_str","sub_region_code"],
                event_time="date",
                description="Engineered electricity demand features",
                online_enabled=True
            )

            logger.info("Feature group 'elec_wx_features' created successfully. Columns: {}".format(list(df.columns)))
            return df
        except Exception as e:
            raise CustomException(f"Failed to perform feature engineering: {e}", sys)

    def transform(self):
        if not self.check_status():
            raise CustomException("Validation failed. Aborting transformation.", sys)
        try:
            df = self.feature_engineering(self.basic_preprocessing())
            df.sort_values("date", inplace=True)

            self.feature_store.create_feature_view(
                name="elec_wx_features_view",
                feature_group_name="elec_wx_features",
                features=[
                    "date", "sub_region_code", "demand", "temperature_2m",
                    "hour", "day_of_week", "month", "is_weekend", "is_holiday"
                ]
            )

            self.feature_store.save_training_dataset(
                feature_view_name="elec_wx_features_view",
                version_description="Training dataset with essential features for electricity demand prediction",
                output_format="csv"
            )

            logger.info("Feature view 'elec_wx_features_view' and training dataset saved successfully.")
            return df
        except Exception as e:
            raise CustomException(f"Failed to transform data: {e}", sys)

In [5]:
try:
        config = DataConfigurationManager()
        data_transformation_config = config.get_data_transformation_config()
        data_transformation = Engineering(config=data_transformation_config)
        df= data_transformation.transform()

except Exception as e:
        raise CustomException(str(e), sys)

[2025-07-20 18:41:12,099: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]
[2025-07-20 18:41:12,108: INFO: helpers: yaml file: config_file\params.yaml loaded successfully]
[2025-07-20 18:41:12,113: INFO: helpers: yaml file: config_file\schema.yaml loaded successfully]
[2025-07-20 18:41:12,116: INFO: helpers: created directory at: artifacts]
[2025-07-20 18:41:12,119: INFO: helpers: created directory at: artifacts/engineering]
[2025-07-20 18:41:12,131: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]
[2025-07-20 18:41:12,134: INFO: external: Initializing external client]
[2025-07-20 18:41:12,136: INFO: external: Base URL: https://c.app.hopsworks.ai:443]
[2025-07-20 18:41:15,469: INFO: python: Python Engine initialized.]

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1240214
[2025-07-20 18:41:18,408: INFO: feature_store: Connected to Hopsworks Feature Store: WattPredictor]
Finished: Reading data from Hopsworks, using Hop

Uploading f:\WattPredictor\artifacts\engineering\label_encoder.pkl: 0.000%|          | 0/549 elapsed<00:00 rem…

[2025-07-20 18:41:38,463: INFO: feature_store: Uploaded file to Feature Store: label_encoder.pkl]
[2025-07-20 18:41:38,465: INFO: 614156716: Label encoding and preprocessing complete. Columns: ['date_str', 'date', 'sub_region_code', 'demand', 'temperature_2m']]
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1240214/fs/1223749/fg/1494587


Uploading Dataframe: 100.00% |██████████| Rows 95799/95799 | Elapsed Time: 00:27 | Remaining Time: 00:00


Launching job: elec_wx_features_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1240214/jobs/named/elec_wx_features_2_offline_fg_materialization/executions
[2025-07-20 18:42:22,058: INFO: feature_store: Feature Group 'elec_wx_features' v2 created successfully.]
[2025-07-20 18:42:22,060: INFO: 614156716: Feature group 'elec_wx_features' created successfully. Columns: ['date_str', 'date', 'sub_region_code', 'demand', 'temperature_2m', 'hour', 'day_of_week', 'month', 'is_weekend', 'is_holiday']]
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1240214/fs/1223749/fv/elec_wx_features_view/version/1
[2025-07-20 18:42:24,873: INFO: feature_store: Feature View 'elec_wx_features_view' v1 created successfully.]
Training dataset job started successfully, you can follow the progress at 
http://c.app.hopsworks.ai/p/1240214/jobs/named/elec_wx_features_view_1_create_fv_td_20072025131319/execution