In [1]:
import os
import sys

In [2]:
os.chdir('../')

In [3]:
sys.path.append(os.path.join(os.getcwd(), "src"))

In [4]:
from dataclasses import dataclass
from pathlib import Path
from WattPredictor.utils.helpers import *
from WattPredictor.utils.exception import *
from WattPredictor.constants import *
from WattPredictor import logger

In [5]:
@dataclass
class DataTransformationConfig:
    root_dir: Path
    data_file: Path
    status_file: str
    label_encoder: Path

@dataclass(frozen=True)
class FeatureStoreConfig:
    hopsworks_project_name: str
    hopsworks_api_key: str

In [6]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_PATH,
                       params_filepath=PARAMS_PATH,
                       schema_filepath=SCHEMA_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        params = self.params.training

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=Path(config.root_dir),
            data_file=Path(config.data_file),
            status_file=Path(config.status_file),
            label_encoder=Path(config.label_encoder)
        )

        return data_transformation_config

    def get_feature_store_config(self) -> FeatureStoreConfig:

        config = self.config.feature_store

        feature_store_config = FeatureStoreConfig(
                hopsworks_project_name=config.hopsworks_project_name,
                hopsworks_api_key=os.environ['hopsworks_api_key'],
        )

        return feature_store_config

In [7]:
import hopsworks
import pandas as pd
import sys
import os
from WattPredictor.config.feature_config import FeatureStoreConfig
from WattPredictor.utils.exception import CustomException
from WattPredictor import logger

class FeatureStore:
    def __init__(self, config:FeatureStoreConfig):
        try:
            self.config = config
            self.connect()
        except Exception as e:
            raise CustomException(e, sys)


    def connect(self):
        try:
            self.project = hopsworks.login(
                project=self.config.hopsworks_project_name,
                api_key_value=self.config.hopsworks_api_key
            )
            self.feature_store = self.project.get_feature_store()
            self.dataset_api = self.project.get_dataset_api()
            logger.info(f"Connected to Hopsworks Feature Store: {self.config.hopsworks_project_name}")
        except Exception as e:
            raise CustomException(e, sys)


    def create_feature_group(self, name, df, primary_key, event_time, description, online_enabled=True, version=1):
        try:
            try:
                fg = self.feature_store.get_feature_group(name=name, version=version)
                logger.info(f"Feature Group '{name}' v{version} exists. Deleting it.")
                fg.delete()
            except Exception:
                logger.info(f"Feature Group '{name}' v{version} does not exist. Will create a new one.")

            # Create a new feature group
            logger.info(f"Creating Feature Group '{name}' v{version}.")
            fg = self.feature_store.get_or_create_feature_group(
                name=name,
                version=version,
                primary_key=primary_key,
                event_time=event_time,
                description=description,
                online_enabled=online_enabled
            )

            fg.save(df)
            logger.info(f"Feature Group '{name}' v{version} created and data inserted.")

        except Exception as e:
            raise CustomException(e, sys)



    def create_feature_view(self, name: str, feature_group_name: str, features: list):
        try:
            fg = self.feature_store.get_feature_group(name=feature_group_name, version=1)
            fv = self.feature_store.get_or_create_feature_view(
                name=name,
                version=1,
                query=fg.select(features),
                description=f"Feature View for {name}"
            )
            logger.info(f"Feature View '{name}' created successfully")
        except Exception as e:
            raise CustomException(e, sys)
        

    def save_training_dataset(self, feature_view_name, version_description, output_format="csv"):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            td = fv.create_training_data(
                description=version_description,
                data_format=output_format,
                write_options={"wait_for_job": True}
            )
            logger.info(f"Training dataset created for Feature View '{feature_view_name}'.")
            return td
        except Exception as e:
            raise CustomException(e, sys)
        
    def load_latest_training_dataset(self, feature_view_name):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            return fv.training_data()
        except Exception as e:
            raise CustomException(e, sys)


    def upload_file_safely(self, local_path: str, target_name: str):

        try:
            self.dataset_api.upload(
                local_path,
                f"Resources/wattpredictor_artifacts/{target_name}",
                overwrite=True 
            )
            logger.info(f"Uploaded file to Feature Store: {target_name}")
        except Exception as e:
            raise CustomException(e, sys)


    def get_training_data(self, feature_view_name: str):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            X, y = fv.training_data()
            logger.info(f"Retrieved training data from Feature View '{feature_view_name}'")
            return X, y
        except Exception as e:
            raise CustomException(e, sys)
    
    
    def get_online_features(self, feature_view_name, key_dict: dict, version=1):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=version)
            if fv is None:
                logger.error(f"[Online Fetch] Feature View '{feature_view_name}' v{version} not found.")
                raise CustomException(f"Feature View '{feature_view_name}' v{version} is None", sys)

            expected_primary_keys = ["date_str", "sub_region_code"]
            
            key_values = [key_dict[key] for key in expected_primary_keys]
            
            try:
                result = fv.get_feature_vector(key_dict)
                logger.info(f"[Online Fetch] Fetched online features using get_feature_vector for {key_dict}: {result}")
                return result
            except Exception as vector_error:
                logger.warning(f"get_feature_vector failed: {vector_error}, trying get_serving_vector")
                
                result = fv.get_serving_vector(key_values).to_dict()
                logger.info(f"[Online Fetch] Fetched online features using get_serving_vector for {key_dict}: {result}")
                return result

        except Exception as e:
            logger.error(f"[Online Fetch] Failed to fetch online features for {feature_view_name} with key {key_dict}")
            raise CustomException(e, sys)

In [8]:
import os
import sys
import json
import joblib
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from WattPredictor.utils.helpers import create_directories, save_bin
from WattPredictor.utils.exception import CustomException
from WattPredictor import logger


class DataTransformation:
    def __init__(self, config: DataTransformationConfig, feature_store_config: FeatureStoreConfig):
        self.config = config
        self.feature_store = FeatureStore(feature_store_config)

    def check_status(self):
        try:
            with open(self.config.status_file, 'r') as f:
                status_data = json.load(f)
            return status_data.get("validation_status", False)
        except Exception as e:
            logger.warning(f"Validation status check failed: {e}")
            return False

    def basic_preprocessing(self) -> pd.DataFrame:
        try:
            fg = self.feature_store.feature_store.get_feature_group(name="elec_wx_demand", version=1)
            df = fg.read()
            le = LabelEncoder()
            df['sub_region_code'] = le.fit_transform(df['subba'])
            df.rename(columns={'subba': 'sub_region', 'value': 'demand'}, inplace=True)
            df = df[['date_str','date', 'sub_region_code', 'demand', 'temperature_2m']]

            create_directories([os.path.dirname(self.config.label_encoder)])
            save_bin(le, self.config.label_encoder)
            self.feature_store.upload_file_safely(self.config.label_encoder, "label_encoder.pkl")

            logger.info("Label encoding and preprocessing complete.")
            return df
        except Exception as e:
            raise CustomException(e, sys)

    def feature_engineering(self, df: pd.DataFrame) -> pd.DataFrame:
        try:
            df['date'] = pd.to_datetime(df['date'], utc=True)
            
            df['hour'] = df['date'].dt.hour
            df['day_of_week'] = df['date'].dt.dayofweek
            df['month'] = df['date'].dt.month
            df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

            holidays = calendar().holidays(start=df['date'].min(), end=df['date'].max())
            df['is_holiday'] = df['date'].isin(holidays).astype(int)

            
            self.feature_store.create_feature_group(
                name="elec_wx_features",
                df=df,
                primary_key=["date_str","sub_region_code"],
                event_time="date",
                description="Engineered electricity demand features",
                online_enabled=True
            )

            logger.info("Feature group created and feature engineering complete.")
            return df
        except Exception as e:
            raise CustomException(e, sys)

    def transform(self):
        if not self.check_status():
            raise CustomException("Validation failed. Aborting transformation.", sys)
        try:
            df = self.feature_engineering(self.basic_preprocessing())
            df.sort_values("date", inplace=True)

            self.feature_store.create_feature_view(
                name="elec_wx_features_view",
                feature_group_name="elec_wx_features",
                features=[
                    "date", "sub_region_code", "demand", "temperature_2m",
                    "hour", "day_of_week", "month", "is_weekend", "is_holiday"
                ]
            )

            self.feature_store.save_training_dataset(
                feature_view_name="elec_wx_features_view",
                version_description="initial training dataset with all features",
                output_format="csv"
            )

            logger.info("Feature view + training dataset saved successfully.")
            return df
        except Exception as e:
            raise CustomException(e, sys)

In [9]:
try:
        config = ConfigurationManager()
        data_transformation_config = config.get_data_transformation_config()
        feature_store_config = config.get_feature_store_config()
        data_transformation = DataTransformation(config=data_transformation_config,feature_store_config=feature_store_config)
        df= data_transformation.transform()

except Exception as e:
        raise CustomException(str(e), sys)

[2025-07-15 14:55:48,106: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]
[2025-07-15 14:55:48,124: INFO: helpers: yaml file: config_file\params.yaml loaded successfully]
[2025-07-15 14:55:48,124: INFO: helpers: yaml file: config_file\schema.yaml loaded successfully]
[2025-07-15 14:55:48,124: INFO: helpers: created directory at: artifacts]
[2025-07-15 14:55:48,124: INFO: helpers: created directory at: artifacts/data_transformation]
[2025-07-15 14:55:48,132: INFO: external: Initializing external client]
[2025-07-15 14:55:48,132: INFO: external: Base URL: https://c.app.hopsworks.ai:443]
[2025-07-15 14:55:50,923: INFO: python: Python Engine initialized.]

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1240214
[2025-07-15 14:55:53,557: INFO: 3903490710: Connected to Hopsworks Feature Store: WattPredictor]
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (3.91s) 
[2025-07-15 14:56:00,842: INFO: helpers: created direc

Uploading f:\WattPredictor\artifacts\data_transformation\label_encoder.pkl: 0.000%|          | 0/549 elapsed<0…

[2025-07-15 14:56:03,477: INFO: 3903490710: Uploaded file to Feature Store: label_encoder.pkl]
[2025-07-15 14:56:03,479: INFO: 4102113803: Label encoding and preprocessing complete.]
[2025-07-15 14:56:03,991: INFO: 3903490710: Feature Group 'elec_wx_features' v1 exists. Deleting it.]
]
[2025-07-15 14:56:05,208: INFO: 3903490710: Creating Feature Group 'elec_wx_features' v1.]
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1240214/fs/1223749/fg/1495468


Uploading Dataframe: 100.00% |██████████| Rows 39545/39545 | Elapsed Time: 00:04 | Remaining Time: 00:00


Launching job: elec_wx_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1240214/jobs/named/elec_wx_features_1_offline_fg_materialization/executions
[2025-07-15 14:56:27,682: INFO: 3903490710: Feature Group 'elec_wx_features' v1 created and data inserted.]
[2025-07-15 14:56:27,683: INFO: 4102113803: Feature group created and feature engineering complete.]


CustomException: Exception in C:\Users\Javith Naseem\AppData\Local\Temp\ipykernel_9664\1019222407.py, line 6: Exception in C:\Users\Javith Naseem\AppData\Local\Temp\ipykernel_9664\4102113803.py, line 82: Exception in C:\Users\Javith Naseem\AppData\Local\Temp\ipykernel_9664\3903490710.py, line 62: Cannot get back the feature view because the query defined is no longer valid. Some feature groups used in the query may have been deleted. You can clean up this feature view on the UI or `FeatureView.clean`.