In [1]:
import os
import sys

In [2]:
os.chdir('../')

In [3]:
sys.path.append(os.path.join(os.getcwd(), "src"))

In [4]:
from dataclasses import dataclass
from pathlib import Path
from WattPredictor.utils.helpers import *
from WattPredictor.utils.exception import *
from WattPredictor.constants import *
from WattPredictor import logger

In [5]:
@dataclass
class DataTransformationConfig:
    root_dir: Path
    data_file: Path
    status_file: str
    label_encoder: Path
    train_features: Path
    test_features: Path
    input_seq_len: int
    step_size: int
    cutoff_date: str

@dataclass(frozen=True)
class FeatureStoreConfig:
    hopsworks_project_name: str
    hopsworks_api_key: str

In [6]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_PATH,
                       params_filepath=PARAMS_PATH,
                       schema_filepath=SCHEMA_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        schema = self.schema
        params = self.params.transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=Path(config.root_dir),
            data_file=Path(config.data_file),
            status_file=Path(config.status_file),
            label_encoder=Path(config.label_encoder),
            train_features=Path(config.train_features),
            test_features=Path(config.test_features),
            input_seq_len=params.input_seq_len,
            step_size=params.step_size,
            cutoff_date=params.cutoff_date
        )

        return data_transformation_config
    
    def get_feature_store_config(self) -> FeatureStoreConfig:

        config = self.config.feature_store

        feature_store_config = FeatureStoreConfig(
                hopsworks_project_name=config.hopsworks_project_name,
                hopsworks_api_key=os.environ['hopsworks_api_key'],
        )

        return feature_store_config

In [7]:
import hopsworks
import pandas as pd
import sys
import os
from WattPredictor.utils.exception import CustomException
from WattPredictor import logger

class FeatureStore:
    def __init__(self, config):
        try:
            self.config = config
            self.connect()
        except Exception as e:
            raise CustomException(e, sys)

    def connect(self):
        try:
            self.project = hopsworks.login(
                project=self.config.hopsworks_project_name,
                api_key_value=self.config.hopsworks_api_key
            )
            self.feature_store = self.project.get_feature_store()
            self.dataset_api = self.project.get_dataset_api()
            logger.info(f"Connected to Hopsworks Feature Store: {self.config.hopsworks_project_name}")
        except Exception as e:
            raise CustomException(e, sys)

    def create_feature_group(self, name, df, primary_key, event_time, description):
        try:
            try:
                fg = self.feature_store.get_feature_group(name=name, version=1)
                logger.info(f"Feature Group '{name}' already exists. Inserting data instead.")
                fg.insert(df)
            except:
                logger.info(f"Feature Group '{name}' does not exist. Creating new one.")
                fg = self.feature_store.get_or_create_feature_group(
                    name=name,
                    version=1,
                    primary_key=primary_key,
                    event_time=event_time,
                    description=description,
                    online_enabled=False
                )
                fg.save(df)

            logger.info(f"Feature Group '{name}' created/updated successfully")

        except Exception as e:
            raise CustomException(e, sys)

    def create_feature_view(self, name: str, feature_group_name: str, features: list):
        try:
            fg = self.feature_store.get_feature_group(name=feature_group_name, version=1)
            fv = self.feature_store.get_or_create_feature_view(
                name=name,
                version=1,
                query=fg.select(features),
                description=f"Feature View for {name}"
            )
            logger.info(f"Feature View '{name}' created successfully")
        except Exception as e:
            raise CustomException(e, sys)

    def upload_file_safely(self, local_path: str, target_name: str):
        """
        Upload file to Hopsworks dataset storage.
        If it already exists, it will be overwritten.
        """
        try:
            self.dataset_api.upload(
                local_path,
                f"Resources/wattpredictor_artifacts/{target_name}",
                overwrite=True 
            )
            logger.info(f"Uploaded file to Feature Store: {target_name}")
        except Exception as e:
            raise CustomException(e, sys)

    def delete_file(self, target_name: str):
        """
        Delete file from Hopsworks dataset storage.
        Only use this if you want to clean up files manually.
        """
        try:
            full_path = f"Resources/wattpredictor_artifacts/{target_name}"
            self.dataset_api.delete(full_path)
            logger.warning(f"Deleted file from Feature Store: {target_name}")
        except Exception as e:
            logger.warning(f"File not found or already deleted: {target_name}")
            # Not raising exception here to allow safe cleanup

    def get_training_data(self, feature_view_name: str):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            X, y = fv.training_data()
            logger.info(f"Retrieved training data from Feature View '{feature_view_name}'")
            return X, y
        except Exception as e:
            raise CustomException(e, sys)

In [8]:
import os
import sys
import json
import joblib
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, StandardScaler
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar

from WattPredictor.utils.helpers import create_directories, save_bin
from WattPredictor.utils.exception import CustomException
from WattPredictor import logger


class DataTransformation:
    def __init__(self, config, feature_store_config):
        self.config = config
        self.feature_store_config = feature_store_config
        self.feature_store = FeatureStore(feature_store_config)

    def check_status(self):
        try:
            with open(self.config.status_file, 'r') as f:
                status_data = json.load(f)
            return status_data.get("validation_status", False)
        except Exception as e:
            logger.warning(f"Validation status error: {e}")
            return False

    def basic_preprocessing(self):
        try:
            fg = self.feature_store.feature_store.get_feature_group(name="elec_wx_demand", version=1)
            df = fg.read()
            df = df[['date', 'subba', 'value', 'temperature_2m']]

            le = LabelEncoder()
            df['sub_region_code'] = le.fit_transform(df['subba'])
            df.rename(columns={'subba': 'sub_region', 'value': 'demand'}, inplace=True)
            df = df[['date', 'sub_region_code', 'demand', 'temperature_2m']]

            create_directories([os.path.dirname(self.config.label_encoder)])
            save_bin(le, self.config.label_encoder)

            self.feature_store.upload_file_safely(
                self.config.label_encoder,
                os.path.basename(self.config.label_encoder)
            )

            logger.info("Basic preprocessing complete.")
            return df

        except Exception as e:
            raise CustomException(e, sys)

    def feature_engineering(self, df: pd.DataFrame):
        try:
            df['date'] = pd.to_datetime(df['date'], utc=True)
            df['hour'] = df['date'].dt.hour
            df['day_of_week'] = df['date'].dt.dayofweek
            df['month'] = df['date'].dt.month
            df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

            holidays = calendar().holidays(start=df['date'].min(), end=df['date'].max())
            df['is_holiday'] = df['date'].isin(holidays).astype(int)

            self.feature_store.create_feature_group(
                name="elec_wx_features",
                df=df,
                primary_key=["date", "sub_region_code"],
                event_time="date",
                description="Engineered features for electricity demand forecasting"
            )
            logger.info("Feature engineering complete.")
            return df

        except Exception as e:
            raise CustomException(e, sys)

    def transform(self):
        if not self.check_status():
            raise CustomException("Validation failed. Aborting transformation.", sys)
        try:
            df = self.feature_engineering(self.basic_preprocessing())
            df.sort_values("date", inplace=True)

            cutoff = pd.to_datetime(self.config.cutoff_date, utc=True)
            train_df = df[df['date'] < cutoff].reset_index(drop=True)
            test_df = df[df['date'] >= cutoff].reset_index(drop=True)
            train_df.to_csv(self.config.train_features, index=False)
            test_df.to_csv(self.config.test_features, index=False)

            self.feature_store.upload_file_safely(self.config.train_features, "train_df.csv")
            self.feature_store.upload_file_safely(self.config.test_features, "test_df.csv")

            return train_df, test_df

        except Exception as e:
            raise CustomException(e, sys)

In [9]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    feature_store_config = config.get_feature_store_config()
    data_transformation = DataTransformation(config=data_transformation_config,feature_store_config=feature_store_config)
    train_df, test_df = data_transformation.transform()

except Exception as e:
    raise CustomException(str(e), sys)

[2025-07-11 14:48:38,210: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]
[2025-07-11 14:48:38,215: INFO: helpers: yaml file: config_file\params.yaml loaded successfully]
[2025-07-11 14:48:38,219: INFO: helpers: yaml file: config_file\schema.yaml loaded successfully]
[2025-07-11 14:48:38,220: INFO: helpers: created directory at: artifacts]
[2025-07-11 14:48:38,221: INFO: helpers: created directory at: artifacts/data_transformation]
[2025-07-11 14:48:38,224: INFO: external: Initializing external client]
[2025-07-11 14:48:38,225: INFO: external: Base URL: https://c.app.hopsworks.ai:443]
[2025-07-11 14:48:40,955: INFO: python: Python Engine initialized.]

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1237149
[2025-07-11 14:48:43,855: INFO: 385296184: Connected to Hopsworks Feature Store: JavithNaseem]
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (108.46s) 
[2025-07-11 14:50:36,039: INFO: helpers: created direc

Uploading f:\WattPredictor\artifacts\data_transformation\label_encoder.pkl: 0.000%|          | 0/549 elapsed<0…

[2025-07-11 14:50:39,044: INFO: 385296184: Uploaded file to Feature Store: label_encoder.pkl]
[2025-07-11 14:50:39,047: INFO: 68799173: Basic preprocessing complete.]
[2025-07-11 14:50:39,360: INFO: 385296184: Feature Group 'elec_wx_features' already exists. Inserting data instead.]
[2025-07-11 14:50:39,361: INFO: 385296184: Feature Group 'elec_wx_features' does not exist. Creating new one.]
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1237149/fs/1220685/fg/1495377


Uploading Dataframe: 100.00% |██████████| Rows 39281/39281 | Elapsed Time: 00:07 | Remaining Time: 00:00


Launching job: elec_wx_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1237149/jobs/named/elec_wx_features_1_offline_fg_materialization/executions
[2025-07-11 14:51:01,188: INFO: 385296184: Feature Group 'elec_wx_features' created/updated successfully]
[2025-07-11 14:51:01,190: INFO: 68799173: Feature engineering complete.]


Uploading f:\WattPredictor\artifacts\data_transformation\train_features.csv: 0.000%|          | 0/999134 elaps…

[2025-07-11 14:51:06,177: INFO: 385296184: Uploaded file to Feature Store: train_df.csv]


Uploading f:\WattPredictor\artifacts\data_transformation\test_features.csv: 0.000%|          | 0/1025674 elaps…

[2025-07-11 14:51:09,568: INFO: 385296184: Uploaded file to Feature Store: test_df.csv]
