In [1]:
import os
import sys
os.chdir('../')
sys.path.append(os.path.join(os.getcwd(), "src"))

In [2]:
from dataclasses import dataclass
from pathlib import Path
from WattPredictor.utils.helpers import *
from WattPredictor.utils.exception import *
from WattPredictor.constants import *
from WattPredictor.utils.logging import logger

In [3]:
@dataclass
class DataDriftConfig:
    baseline_start: str
    baseline_end: str
    current_start: str
    current_end: str
    report_dir: Path


@dataclass(frozen=True)
class FeatureStoreConfig:
    hopsworks_project_name: str
    hopsworks_api_key: str

In [4]:
class ConfigurationManager:

    def __init__(self, config_filepath=CONFIG_PATH,
                       params_filepath=PARAMS_PATH,
                       schema_filepath=SCHEMA_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_drift_config(self) -> DataDriftConfig:
        config = self.config.data_drift
        params = self.params.drift

        create_directories([config.root_dir])
        
        data_drift_cofig =  DataDriftConfig(
            baseline_start=self.params.drift.baseline_start,
            baseline_end=self.params.drift.baseline_end,
            current_start=self.params.drift.current_start,
            current_end=self.params.drift.current_end,
            report_dir=Path(config.report_dir)
        )
        
        return data_drift_cofig
    

    def get_feature_store_config(self) -> FeatureStoreConfig:
        config = self.config.feature_store

        feature_store_config = FeatureStoreConfig(
                hopsworks_project_name=config.hopsworks_project_name,
                hopsworks_api_key=os.environ['hopsworks_api_key'],
        )

        return feature_store_config

In [5]:
import hopsworks
import pandas as pd
import sys
import os
from WattPredictor.utils.exception import CustomException

class FeatureStore:
    def __init__(self, config):
        try:
            self.config = config
            self.connect()
        except Exception as e:
            raise CustomException(e, sys)


    def connect(self):
        try:
            self.project = hopsworks.login(
                project=self.config.hopsworks_project_name,
                api_key_value=self.config.hopsworks_api_key
            )
            self.feature_store = self.project.get_feature_store()
            self.dataset_api = self.project.get_dataset_api()
            logger.info(f"Connected to Hopsworks Feature Store: {self.config.hopsworks_project_name}")
        except Exception as e:
            raise CustomException(e, sys)


    def create_feature_group(self, name, df, primary_key, event_time, description, online_enabled=True, version=1):
        try:
            # Check if the feature group already exists
            try:
                fg = self.feature_store.get_feature_group(name=name, version=version)
                logger.info(f"Feature Group '{name}' v{version} exists. Deleting it.")
                fg.delete()  # Delete existing feature group
            except Exception:
                logger.info(f"Feature Group '{name}' v{version} does not exist. Will create a new one.")

            # Create a new feature group
            logger.info(f"Creating Feature Group '{name}' v{version}.")
            fg = self.feature_store.get_or_create_feature_group(
                name=name,
                version=version,
                primary_key=primary_key,
                event_time=event_time,
                description=description,
                online_enabled=online_enabled
            )

            fg.save(df)
            logger.info(f"Feature Group '{name}' v{version} created and data inserted.")

        except Exception as e:
            raise CustomException(e, sys)



    def create_feature_view(self, name: str, feature_group_name: str, features: list):
        try:
            fg = self.feature_store.get_feature_group(name=feature_group_name, version=1)
            fv = self.feature_store.get_or_create_feature_view(
                name=name,
                version=1,
                query=fg.select(features),
                description=f"Feature View for {name}"
            )
            logger.info(f"Feature View '{name}' created successfully")
        except Exception as e:
            raise CustomException(e, sys)
        

    def save_training_dataset(self, feature_view_name, version_description, output_format="csv"):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            td = fv.create_training_data(
                description=version_description,
                data_format=output_format,
                write_options={"wait_for_job": True}
            )
            logger.info(f"Training dataset created for Feature View '{feature_view_name}'.")
            return td
        except Exception as e:
            raise CustomException(e, sys)
        
    def load_latest_training_dataset(self, feature_view_name):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            return fv.training_data()
        except Exception as e:
            raise CustomException(e, sys)


    def upload_file_safely(self, local_path: str, target_name: str):

        try:
            self.dataset_api.upload(
                local_path,
                f"Resources/wattpredictor_artifacts/{target_name}",
                overwrite=True 
            )
            logger.info(f"Uploaded file to Feature Store: {target_name}")
        except Exception as e:
            raise CustomException(e, sys)


    def get_training_data(self, feature_view_name: str):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=1)
            X, y = fv.training_data()
            logger.info(f"Retrieved training data from Feature View '{feature_view_name}'")
            return X, y
        except Exception as e:
            raise CustomException(e, sys)
    
    
    def get_online_features(self, feature_view_name, key_dict: dict, version=1):
        try:
            fv = self.feature_store.get_feature_view(name=feature_view_name, version=version)
            if fv is None:
                logger.error(f"[Online Fetch] Feature View '{feature_view_name}' v{version} not found.")
                raise CustomException(f"Feature View '{feature_view_name}' v{version} is None", sys)

            expected_primary_keys = ["date_str", "sub_region_code"]
            
            key_values = [key_dict[key] for key in expected_primary_keys]
            
            try:
                result = fv.get_feature_vector(key_dict)
                logger.info(f"[Online Fetch] Fetched online features using get_feature_vector for {key_dict}: {result}")
                return result
            except Exception as vector_error:
                logger.warning(f"get_feature_vector failed: {vector_error}, trying get_serving_vector")
                
                result = fv.get_serving_vector(key_values).to_dict()
                logger.info(f"[Online Fetch] Fetched online features using get_serving_vector for {key_dict}: {result}")
                return result

        except Exception as e:
            logger.error(f"[Online Fetch] Failed to fetch online features for {feature_view_name} with key {key_dict}")
            raise CustomException(e, sys)

In [None]:
import os
import sys
import json
import pandas as pd
from pathlib import Path
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metrics import DatasetDriftMetric, ColumnDriftMetric, ColumnSummaryMetric
from WattPredictor.utils.helpers import create_directories
from WattPredictor.utils.exception import CustomException


class DriftDetector:
    def __init__(self, feature_store_config, config):
        self.config = config
        self.feature_store = FeatureStore(feature_store_config)

    def _load_data(self, start_date, end_date):
        try:
            df, _ = self.feature_store.load_latest_training_dataset('elec_wx_features_view')
            df['date'] = pd.to_datetime(df['date'], utc=True)
            df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
            df = df.drop(columns=["date_str"], errors="ignore")
            return df
        except Exception as e:
            raise CustomException(f"Error loading data from Hopsworks: {e}", sys)

    def Detect(self):
        try:
            baseline_df = self._load_data(self.config.baseline_start, self.config.baseline_end)
            current_df = self._load_data(self.config.current_start, self.config.current_end)

            if baseline_df.empty or current_df.empty:
                raise CustomException("Baseline or current data is empty", sys)

            # Create column mapping for better drift detection
            column_mapping = ColumnMapping()
            
            # Set target column if it exists
            if 'demand' in baseline_df.columns:
                column_mapping.target = 'demand'
            
            # Set prediction column if it exists
            if 'prediction' in current_df.columns:
                column_mapping.prediction = 'prediction'

            # Create metrics list
            metrics = [
                DataDriftPreset(),
                DatasetDriftMetric(),
                ColumnSummaryMetric(column_name="demand"),
            ]
            
            # Add column drift metrics for specific columns if they exist
            if "temperature_2m" in baseline_df.columns:
                metrics.append(ColumnDriftMetric(column_name="temperature_2m"))
            
            if "sub_region_code" in baseline_df.columns:
                metrics.append(ColumnDriftMetric(column_name="sub_region_code"))

            # Create and run report
            report = Report(metrics=metrics)
            report.run(reference_data=baseline_df, current_data=current_df, column_mapping=column_mapping)
            
            # Create directories and save reports
            create_directories([self.config.report_dir])
            html_path = self.config.report_dir / "drift_report.html"
            json_path = self.config.report_dir / "drift_report.json"

            report.save_html(str(html_path))
            report_dict = report.as_dict()

            def json_serializer(obj):
                if hasattr(obj, 'isoformat'):
                    return obj.isoformat()
                elif hasattr(obj, 'tolist'):
                    return obj.tolist()
                elif hasattr(obj, '__dict__'):
                    return obj.__dict__
                elif pd.isna(obj):
                    return None
                else:
                    return str(obj)

            with open(json_path, "w") as f:
                json.dump(report_dict, f, indent=4, default=json_serializer)

            # Extract drift detection result
            drift_detected = False
            try:
                # Look for dataset drift in the metrics results
                for metric in report_dict.get('metrics', []):
                    if metric.get('metric') == 'DatasetDriftMetric':
                        drift_detected = metric.get('result', {}).get('dataset_drift', False)
                        break
                    elif 'dataset_drift' in metric.get('result', {}):
                        drift_detected = metric['result']['dataset_drift']
                        break
            except (KeyError, TypeError) as e:
                logger.warning(f"Could not extract drift detection result: {e}")

            logger.info(f"Drift Detected: {drift_detected}")
            logger.info(f"Report saved at {html_path}")
            return drift_detected, report_dict

        except Exception as e:
            raise CustomException(f"Drift detection failed: {e}", sys)

]


In [7]:
try:
    config = ConfigurationManager()
    drift_config = config.get_data_drift_config()
    feature_store_config = config.get_feature_store_config()
    drift_detector = DriftDetector(config=drift_config, feature_store_config=feature_store_config)
    drift_detected, report_dict = drift_detector.Detect()        
        
except Exception as e:
    raise CustomException(e, sys)

[2025-07-16 10:27:33,769: INFO: helpers: yaml file: config_file\config.yaml loaded successfully]


[2025-07-16 10:27:33,785: INFO: helpers: yaml file: config_file\params.yaml loaded successfully]
[2025-07-16 10:27:33,800: INFO: helpers: yaml file: config_file\schema.yaml loaded successfully]
[2025-07-16 10:27:33,803: INFO: helpers: created directory at: artifacts]
[2025-07-16 10:27:33,805: INFO: helpers: created directory at: artifacts/data_drift]
[2025-07-16 10:27:33,810: INFO: external: Initializing external client]
[2025-07-16 10:27:33,810: INFO: external: Base URL: https://c.app.hopsworks.ai:443]
To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'
]






[2025-07-16 10:27:41,459: INFO: python: Python Engine initialized.]

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1240214
[2025-07-16 10:27:43,438: INFO: 909816121: Connected to Hopsworks Feature Store: WattPredictor]


CustomException: Exception in C:\Users\Javith Naseem\AppData\Local\Temp\ipykernel_1648\768199110.py, line 6: 'DriftDetector' object has no attribute 'Detect'