In [1]:
import os
os.chdir("..")

In [3]:
#!/usr/bin/env python3
"""
Example usage of DiCE Explainer for electricity consumption reduction
"""

import json
import sys

from src.dice_explainer import DiceExplainer

# Initialize DiCE Explainer
print("=" * 80)
print("DiCE EXPLAINER - ELECTRICITY CONSUMPTION REDUCTION")
print("=" * 80)

explainer = DiceExplainer()

# Example: Building with high electricity consumption
print("\n" + "=" * 80)
print("EXAMPLE: Building with High Consumption")
print("=" * 80)

json_data = {
    'time': '2016-01-01T21:00:00',
    'building_id': 'Bear_education_Sharon',
    'site_id': 'Bear',
    'primaryspaceusage': 'Education',
    'sub_primaryspaceusage': 'Education',
    'sqm': 5261.7,
    'yearbuilt': 1953,
    'numberoffloors': 5,
    'occupants': 200,  # High number of occupants
    'timezone': 'US/Pacific',
    'airTemperature': 25.0,  # High temperature (needs cooling)
    'cloudCoverage': 30.0,
    'dewTemperature': 18.0,
    'windSpeed': 2.6,
    'seaLvlPressure': 1020.7,
    'precipDepth1HR': 0.0
}

# First, check current prediction
current_pred = explainer.inference.predict(json_data)
print(f"\nüìä Current predicted consumption: {current_pred:.2f} kWh")

DiCE EXPLAINER - ELECTRICITY CONSUMPTION REDUCTION
‚úÖ Loaded model from: output/models/xgboost_wrapped_dice.pkl
‚úÖ Loaded 5 label encoders
‚úÖ Loaded model info: XGBoost
   - Test R¬≤: 0.9843
   - Test RMSE: 30.30 kWh
‚úÖ Loaded historical data: (25187366, 61)
üîß Setting up DiCE...
   ‚ö†Ô∏è  Fixing hour_sin: 229 very small values (scientific notation issue)
   ‚ö†Ô∏è  Fixing hour_cos: 427 very small values (scientific notation issue)
   ‚ö†Ô∏è  Fixing month_sin: 820 very small values (scientific notation issue)
   ‚ö†Ô∏è  Fixing month_cos: 871 very small values (scientific notation issue)
‚úÖ DiCE setup complete!

EXAMPLE: Building with High Consumption

üìä Current predicted consumption: 87.87 kWh


In [26]:
current_prediction = explainer.inference.predict(json_data, include_lag=True)

In [5]:
result = explainer.generate_recommendations(
    json_data=json_data,
    threshold=50.0,
    total_cfs=5,
    method='genetic'  # Use 'genetic' for better results, 'random' for faster
)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  4.14it/s]


In [8]:
X = explainer.inference._preprocess_input(json_data, include_lag=True)
X

Unnamed: 0,sqm,yearbuilt,numberoffloors,occupants,airTemperature,cloudCoverage,dewTemperature,windSpeed,seaLvlPressure,precipDepth1HR,...,electricity_lag1,electricity_lag24,electricity_lag168,electricity_rolling_mean_24h,electricity_rolling_std_24h,electricity_rolling_mean_7d,primaryspaceusage,sub_primaryspaceusage,site_id,timezone
0,5261.7,1953.0,5.0,200.0,25.0,30.0,18.0,2.6,1020.7,0.0,...,94.75,0.0,0.0,98.125,3.802999,98.125,0,21,0,5


In [9]:
query_instance = explainer._prepare_query_instance(json_data, X)

In [10]:
query_instance

Unnamed: 0,sqm,yearbuilt,numberoffloors,occupants,airTemperature,cloudCoverage,dewTemperature,windSpeed,seaLvlPressure,precipDepth1HR,...,electricity_lag24,electricity_lag168,electricity_rolling_mean_24h,electricity_rolling_std_24h,electricity_rolling_mean_7d,primaryspaceusage,sub_primaryspaceusage,site_id,timezone,electricity_consumption
0,5261.7,1953.0,5.0,200.0,25.0,30.0,18.0,2.6,1020.7,0.0,...,0.0,0.0,98.125,3.802999,98.125,Education,Education,Bear,US/Pacific,87.868225


In [11]:
time_categorical_features = ['hour', 'day_of_week', 'month', 'year', 'is_weekend']
for col in time_categorical_features:
    if col in query_instance.columns and col not in explainer.dice_data.continuous_feature_names:
        # DiCE treats this as categorical - ensure it's an integer string with object dtype
        try:
            val = query_instance[col].iloc[0]
            # Convert to int then string to avoid '0.0'
            if isinstance(val, str):
                # If it's already a string, check if it has decimal
                if '.' in val:
                    int_val = int(float(val))
                    query_instance[col] = str(int_val)
                # Already an integer string, keep it
            else:
                # Convert numeric to integer string
                int_val = int(float(val))
                query_instance[col] = str(int_val)
            
            # CRITICAL: Set dtype to object (string) to prevent float conversion
            query_instance[col] = query_instance[col].astype('object')
        except (ValueError, TypeError):
            # If conversion fails, use original value from json_data
            if col in json_data:
                try:
                    int_val = int(float(json_data[col]))
                    query_instance[col] = str(int_val)
                    query_instance[col] = query_instance[col].astype('object')
                except:
                    pass

    # Get permitted ranges for actionable features
    permitted_range = explainer._get_permitted_ranges(query_instance, json_data)
    
    # Remove target column from query instance (DiCE doesn't want it)
    query_instance_for_dice = query_instance.drop(columns=['electricity_consumption'], errors='ignore')
    

In [12]:
permitted_range

{'sqm': [3683.1899999999996, 5261.7],
 'occupants': [100.0, 200.0],
 'airTemperature': [20.0, 30.0]}

In [13]:
query_instance_for_dice

Unnamed: 0,sqm,yearbuilt,numberoffloors,occupants,airTemperature,cloudCoverage,dewTemperature,windSpeed,seaLvlPressure,precipDepth1HR,...,electricity_lag1,electricity_lag24,electricity_lag168,electricity_rolling_mean_24h,electricity_rolling_std_24h,electricity_rolling_mean_7d,primaryspaceusage,sub_primaryspaceusage,site_id,timezone
0,5261.7,1953.0,5.0,200.0,25.0,30.0,18.0,2.6,1020.7,0.0,...,94.75,0.0,0.0,98.125,3.802999,98.125,Education,Education,Bear,US/Pacific


In [14]:
cf_params = {
    'query_instances': query_instance_for_dice,
    'total_CFs': 5,
    'desired_range': [0, 40.0],
    'permitted_range': permitted_range
}

In [16]:
counterfactuals = explainer.explainer.generate_counterfactuals(**cf_params)
counterfactuals

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  4.35it/s]


<dice_ml.counterfactual_explanations.CounterfactualExplanations at 0x13c18ac10>

In [19]:
cf_example = counterfactuals.cf_examples_list[0]

In [22]:
cf_df = cf_example.final_cfs_df

In [27]:
# Get predictions for counterfactuals
cf_predictions = []
recommendations = []

for idx, cf_row in cf_df.iterrows():
    # Convert counterfactual back to dict format
    cf_dict = cf_row.to_dict()
    
    # Predict consumption for this counterfactual
    # Need to prepare it properly for prediction
    cf_data = json_data.copy()
    
    # Update with counterfactual values
    for feat, value in cf_dict.items():
        if feat != 'electricity_consumption' and feat in cf_data:
            # Handle different naming conventions
            if feat in cf_data:
                cf_data[feat] = value
            elif feat.replace('_', '') in [k.replace('_', '') for k in cf_data.keys()]:
                # Find matching key
                for key in cf_data.keys():
                    if key.replace('_', '') == feat.replace('_', ''):
                        cf_data[key] = value
                        break
    
    # Predict
    try:
        cf_pred = explainer.inference.predict(cf_data, include_lag=False)
        cf_predictions.append(cf_pred)
        
        # Calculate changes
        changes = explainer._calculate_changes(json_data, cf_dict, current_prediction, cf_pred)
        
        recommendations.append({
            'counterfactual_id': idx,
            'predicted_consumption': float(cf_pred),
            'reduction': float(current_prediction - cf_pred),
            'reduction_pct': float((current_prediction - cf_pred) / current_prediction * 100),
            'below_threshold': cf_pred <= 40.0,
            'changes': changes
        })
    except Exception as e:
        print(f"‚ö†Ô∏è  Error predicting counterfactual {idx}: {e}")
        continue

In [28]:
cf_predictions

[np.float32(1.2340472),
 np.float32(1.2631855),
 np.float32(1.3781884),
 np.float32(1.2361456),
 np.float32(1.5567492)]

In [29]:
recommendations

[{'counterfactual_id': 0,
  'predicted_consumption': 1.2340471744537354,
  'reduction': 86.6341781616211,
  'reduction_pct': 98.59557342529297,
  'below_threshold': np.True_,
  'changes': [{'feature': 'sqm',
    'description': 'Building area (square meters)',
    'original_value': 5261.7,
    'suggested_value': 4698.8,
    'change': -562.8999999999996,
    'change_pct': -10.698063363551698,
    'action': 'Reduce building area by 563 sqm (10.7%)'}]},
 {'counterfactual_id': 0,
  'predicted_consumption': 1.2631855010986328,
  'reduction': 86.60504150390625,
  'reduction_pct': 98.56241607666016,
  'below_threshold': np.True_,
  'changes': [{'feature': 'sqm',
    'description': 'Building area (square meters)',
    'original_value': 5261.7,
    'suggested_value': 5234.4,
    'change': -27.300000000000182,
    'change_pct': -0.5188437197103632,
    'action': 'Reduce building area by 27 sqm (0.5%)'}]},
 {'counterfactual_id': 0,
  'predicted_consumption': 1.3781883716583252,
  'reduction': 86.4