In [1]:
import os
import sys

In [2]:
os.chdir('../')

In [3]:
sys.path.append(os.path.join(os.getcwd(), "src"))

In [4]:
from dataclasses import dataclass
from pathlib import Path
from WattPredictor.utils.helpers import *
from WattPredictor.utils.exception import *
from WattPredictor.constants import *
from WattPredictor import logger

In [5]:
@dataclass
class DataTransformationConfig:
    root_dir: Path
    data_file: Path
    status_file: str
    label_encoder: Path
    input_seq_len: int
    step_size: int

@dataclass(frozen=True)
class FeatureStoreConfig:
    hopsworks_project_name: str
    hopsworks_api_key: str

In [6]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_PATH,
                       params_filepath=PARAMS_PATH,
                       schema_filepath=SCHEMA_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        params = self.params.model_trainer

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=Path(config.root_dir),
            data_file=Path(config.data_file),
            status_file=Path(config.status_file),
            label_encoder=Path(config.label_encoder),
            input_seq_len=params.input_seq_len,
            step_size=params.step_size
        )

        return data_transformation_config

    def get_feature_store_config(self) -> FeatureStoreConfig:

        config = self.config.feature_store

        feature_store_config = FeatureStoreConfig(
                hopsworks_project_name=config.hopsworks_project_name,
                hopsworks_api_key=os.environ['hopsworks_api_key'],
        )

        return feature_store_config

In [None]:
import hopsworks
import pandas as pd
import sys
from WattPredictor.utils.exception import CustomException
from WattPredictor import logger


class FeatureStore:
    def __init__(self, config):
        self.config = config
        self.connect()

    def connect(self):
        try:
            self.project = hopsworks.login(
                project=self.config.hopsworks_project_name,
                api_key_value=self.config.hopsworks_api_key
            )
            self.feature_store = self.project.get_feature_store()
            self.dataset_api = self.project.get_dataset_api()
            logger.info(f"Connected to Hopsworks Feature Store: {self.config.hopsworks_project_name}")
        except Exception as e:
            raise CustomException(e, sys)

    def create_or_update_feature_group(self, name, df, primary_key, event_time, description, online=False):
        """
        Alternative method that handles feature group creation/update more robustly
        """
        try:
            import time
            
            # Step 1: Check if feature group exists
            fg = None
            try:
                fg = self.feature_store.get_feature_group(name=name, version=1)
                
                # Additional check to ensure fg is not None
                if fg is not None:
                    logger.info(f"Feature Group '{name}' found. Will insert new data...")
                    # Insert data directly if feature group exists
                    fg.insert(df, write_options={"wait_for_job": True})
                    logger.info(f"Data successfully inserted into existing Feature Group '{name}'.")
                    return fg
                else:
                    logger.info(f"Feature Group '{name}' returned None, treating as not found.")
                    raise Exception("Feature group not found")
                    
            except Exception as get_error:
                logger.info(f"Feature Group '{name}' not found: {str(get_error)}")
                logger.info("Proceeding to create new feature group...")

            # Step 2: Create feature group if it doesn't exist
            try:
                # Use get_or_create_feature_group if available, otherwise create_feature_group
                if hasattr(self.feature_store, 'get_or_create_feature_group'):
                    fg = self.feature_store.get_or_create_feature_group(
                        name=name,
                        version=1,
                        primary_key=primary_key,
                        event_time=event_time,
                        description=description,
                        online_enabled=online
                    )
                    logger.info(f"Feature Group '{name}' created using get_or_create_feature_group")
                else:
                    # Traditional create approach
                    fg = self.feature_store.create_feature_group(
                        name=name,
                        version=1,
                        primary_key=primary_key,
                        event_time=event_time,
                        description=description,
                        online_enabled=online
                    )
                    logger.info(f"Feature Group '{name}' created using create_feature_group")

            except Exception as create_error:
                logger.error(f"Failed to create feature group '{name}': {str(create_error)}")
                raise create_error

            if fg is None:
                raise Exception(f"Feature group '{name}' is None after creation")

            logger.info(f"Inserting {len(df)} rows into Feature Group '{name}'...")
            fg.insert(df, write_options={"wait_for_job": True})
            logger.info(f"Data successfully inserted into Feature Group '{name}'.")
            
            return fg

        except Exception as e:
            logger.error(f"Error in create_or_update_feature_group for '{name}': {str(e)}")
            raise CustomException(e, sys)

    def create_feature_view(self, name, feature_group_name, features):
        try:
            try:
                existing_fv = self.feature_store.get_feature_view(name=name, version=1)
                if existing_fv is not None:
                    existing_fv.delete()
                    logger.info(f"Deleted existing Feature View '{name}' for clean recreation.")
            except Exception as delete_error:
                logger.warning(f"No existing Feature View to delete: {delete_error}")

            fg = self.feature_store.get_feature_group(name=feature_group_name, version=1)
            query = fg.select(features)
            fv = self.feature_store.create_feature_view(
                name=name,
                version=1,
                query=query,
                description=f"Feature View for {feature_group_name}"
            )
            logger.info(f"Feature View '{name}' created successfully.")
            return fv

        except Exception as e:
            raise CustomException(e, sys)

    def save_training_dataset(self, feature_view_name, version_description, output_format="csv"):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            td = fv.create_training_data(
                description=version_description,
                data_format=output_format,
                write_options={"wait_for_job": True}
            )
            logger.info(f"Training dataset version for Feature View '{feature_view_name}' created.")
            return td
        except Exception as e:
            raise CustomException(e, sys)

    def load_latest_training_dataset(self, feature_view_name):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            return fv.training_data()
        except Exception as e:
            raise CustomException(e, sys)

    def get_online_features(self, feature_view_name, key_dict: dict):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            return fv.get_online_features(key_dict)
        except Exception as e:
            raise CustomException(e, sys)

    def upload_file_safely(self, local_path: str, target_name: str):
        try:
            self.dataset_api.upload(
                local_path,
                f"Resources/wattpredictor_artifacts/{target_name}",
                overwrite=True
            )
            logger.info(f"Uploaded file to Feature Store: {target_name}")
        except Exception as e:
            raise CustomException(e, sys)

In [8]:
import os
import sys
import json
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from WattPredictor.utils.helpers import create_directories, save_bin
from WattPredictor.utils.exception import CustomException
from WattPredictor import logger


class DataTransformation:
    def __init__(self, config: DataTransformationConfig, feature_store_config: FeatureStoreConfig):
        self.config = config
        self.feature_store = FeatureStore(feature_store_config)

    def check_status(self):
        try:
            with open(self.config.status_file, 'r') as f:
                status_data = json.load(f)
            return status_data.get("validation_status", False)
        except Exception as e:
            logger.warning(f"Validation status check failed: {e}")
            return False

    def basic_preprocessing(self) -> pd.DataFrame:
        try:
            fg = self.feature_store.feature_store.get_feature_group(name="elec_wx_demand", version=1)
            df = fg.read()
            df = df[['date', 'subba', 'value', 'temperature_2m']]

            le = LabelEncoder()
            df['sub_region_code'] = le.fit_transform(df['subba'])
            df.rename(columns={'subba': 'sub_region', 'value': 'demand'}, inplace=True)
            df = df[['date', 'sub_region_code', 'demand', 'temperature_2m']]

            create_directories([os.path.dirname(self.config.label_encoder)])
            save_bin(le, self.config.label_encoder)
            self.feature_store.upload_file_safely(self.config.label_encoder, "label_encoder.pkl")

            logger.info("Label encoding and preprocessing complete.")
            return df
        except Exception as e:
            raise CustomException(e, sys)

    def feature_engineering(self, df: pd.DataFrame) -> pd.DataFrame:
        try:
            # Convert date to datetime if not already
            df['date'] = pd.to_datetime(df['date'], utc=True)
            
            # Create time-based features
            df['hour'] = df['date'].dt.hour
            df['day_of_week'] = df['date'].dt.dayofweek
            df['month'] = df['date'].dt.month
            df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

            # Create holiday feature
            holidays = calendar().holidays(start=df['date'].min(), end=df['date'].max())
            df['is_holiday'] = df['date'].isin(holidays).astype(int)

            # Debug information
            logger.info(f"DataFrame shape: {df.shape}")
            logger.info(f"DataFrame columns: {df.columns.tolist()}")
            logger.info(f"Date column type: {df['date'].dtype}")
            logger.info(f"Sample data:\n{df.head()}")
            
            # Fixed: Use the correct method name
            self.feature_store.create_or_update_feature_group(
                name="elec_wx_features",
                df=df,
                primary_key=["sub_region_code"],
                event_time="date",
                description="Engineered electricity demand features",
                online=True
            )

            logger.info("Feature group created and feature engineering complete.")
            return df
        except Exception as e:
            raise CustomException(e, sys)

    def transform(self):
        if not self.check_status():
            raise CustomException("Validation failed. Aborting transformation.", sys)
        try:
            df = self.feature_engineering(self.basic_preprocessing())
            df.sort_values("date", inplace=True)

            self.feature_store.create_feature_view(
                name="elec_wx_features_view",
                feature_group_name="elec_wx_features",
                features=[
                    "date", "sub_region_code", "demand", "temperature_2m",
                    "hour", "day_of_week", "month", "is_weekend", "is_holiday"
                ]
            )

            self.feature_store.save_training_dataset(
                feature_view_name="elec_wx_features_view",
                version_description="initial training dataset with all features",
                output_format="csv"
            )

            logger.info("Feature view + training dataset saved successfully.")
            return df
        except Exception as e:
            raise CustomException(e, sys)

In [9]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    feature_store_config = config.get_feature_store_config()
    data_transformation = DataTransformation(config=data_transformation_config,feature_store_config=feature_store_config)
    df= data_transformation.transform()

except Exception as e:
    raise CustomException(str(e), sys)

[2025-07-12 17:32:07,851: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]
[2025-07-12 17:32:07,855: INFO: helpers: yaml file: config_file\params.yaml loaded successfully]
[2025-07-12 17:32:07,855: INFO: helpers: yaml file: config_file\schema.yaml loaded successfully]
[2025-07-12 17:32:07,861: INFO: helpers: created directory at: artifacts]
[2025-07-12 17:32:07,862: INFO: helpers: created directory at: artifacts/data_transformation]
[2025-07-12 17:32:07,864: INFO: external: Initializing external client]
[2025-07-12 17:32:07,865: INFO: external: Base URL: https://c.app.hopsworks.ai:443]
To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'
]






[2025-07-12 17:32:10,535: INFO: python: Python Engine initialized.]

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1237149
[2025-07-12 17:32:12,286: INFO: 1554228661: Connected to Hopsworks Feature Store: JavithNaseem]
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (3.47s) 
[2025-07-12 17:32:18,881: INFO: helpers: created directory at: artifacts\data_transformation]
[2025-07-12 17:32:18,883: INFO: helpers: binary file saved at: artifacts\data_transformation\label_encoder.pkl]


Uploading f:\WattPredictor\artifacts\data_transformation\label_encoder.pkl: 0.000%|          | 0/549 elapsed<0…

[2025-07-12 17:32:21,525: INFO: 1554228661: Uploaded file to Feature Store: label_encoder.pkl]
[2025-07-12 17:32:21,533: INFO: 1405442928: Label encoding and preprocessing complete.]
[2025-07-12 17:32:21,586: INFO: 1405442928: DataFrame shape: (39281, 9)]
[2025-07-12 17:32:21,589: INFO: 1405442928: DataFrame columns: ['date', 'sub_region_code', 'demand', 'temperature_2m', 'hour', 'day_of_week', 'month', 'is_weekend', 'is_holiday']]
[2025-07-12 17:32:21,592: INFO: 1405442928: Date column type: datetime64[us, UTC]]
[2025-07-12 17:32:21,592: INFO: 1405442928: Sample data:
                       date  sub_region_code demand  temperature_2m  hour  \
0 2025-02-07 18:00:00+00:00                4   1033           2.146    18   
1 2025-02-10 06:00:00+00:00                0   1647          -1.704     6   
2 2025-04-07 01:00:00+00:00                2   1752          10.746     1   
3 2025-02-14 20:00:00+00:00                4    966           0.296    20   
4 2025-02-16 18:00:00+00:00            

Uploading Dataframe: 100.00% |██████████| Rows 39281/39281 | Elapsed Time: 00:03 | Remaining Time: 00:00


Launching job: elec_wx_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1237149/jobs/named/elec_wx_features_1_offline_fg_materialization/executions
[2025-07-12 17:32:44,625: INFO: execution_engine: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED]
[2025-07-12 17:32:47,925: INFO: execution_engine: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED]
[2025-07-12 17:34:36,295: INFO: execution_engine: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED]
[2025-07-12 17:34:36,567: INFO: execution_engine: Waiting for log aggregation to finish.]
[2025-07-12 17:34:56,143: INFO: execution_engine: Execution finished successfully.]
[2025-07-12 17:34:56,147: INFO: 1554228661: Data successfully inserted into existing Feature Group 'elec_wx_features'.]
[2025-07-12 17:34:56,151: INFO: 1405442928: Feature group created and f