In [1]:
import os
os.chdir("..")

In [2]:
#!/usr/bin/env python3
"""
Example usage of DiCE Explainer for electricity consumption reduction
"""

import json
import sys

from src.dice_explainer import DiceExplainer

# Initialize DiCE Explainer
print("=" * 80)
print("DiCE EXPLAINER - ELECTRICITY CONSUMPTION REDUCTION")
print("=" * 80)

explainer = DiceExplainer()

# Example: Building with high electricity consumption
print("\n" + "=" * 80)
print("EXAMPLE: Building with High Consumption")
print("=" * 80)

json_data = {
    'time': '2016-01-01T21:00:00',
    'building_id': 'Bear_education_Sharon',
    'site_id': 'Bear',
    'primaryspaceusage': 'Education',
    'sub_primaryspaceusage': 'Education',
    'sqm': 5261.7,
    'yearbuilt': 1953,
    'numberoffloors': 5,
    'occupants': 200,  # High number of occupants
    'timezone': 'US/Pacific',
    'airTemperature': 25.0,  # High temperature (needs cooling)
    'cloudCoverage': 30.0,
    'dewTemperature': 18.0,
    'windSpeed': 2.6,
    'seaLvlPressure': 1020.7,
    'precipDepth1HR': 0.0
}

# First, check current prediction
current_pred = explainer.inference.predict(json_data)
print(f"\nüìä Current predicted consumption: {current_pred:.2f} kWh")

DiCE EXPLAINER - ELECTRICITY CONSUMPTION REDUCTION
‚úÖ Loaded model from: output/models/xgboost_wrapped_dice.pkl
‚úÖ Loaded 5 label encoders
‚úÖ Loaded model info: XGBoost
   - Test R¬≤: 0.9843
   - Test RMSE: 30.30 kWh
‚úÖ Loaded historical data: (25187366, 61)
üîß Setting up DiCE...
   ‚ö†Ô∏è  Fixing hour_sin: 229 very small values (scientific notation issue)
   ‚ö†Ô∏è  Fixing hour_cos: 427 very small values (scientific notation issue)
   ‚ö†Ô∏è  Fixing month_sin: 820 very small values (scientific notation issue)
   ‚ö†Ô∏è  Fixing month_cos: 871 very small values (scientific notation issue)
‚úÖ DiCE setup complete!

EXAMPLE: Building with High Consumption

üìä Current predicted consumption: 87.87 kWh


In [3]:
current_prediction = explainer.inference.predict(json_data, include_lag=True)

In [4]:
current_prediction

np.float32(87.868225)

In [5]:
result = explainer.generate_recommendations(
    json_data=json_data,
    threshold=50.0,
    total_cfs=5,
    method='genetic'  # Use 'genetic' for better results, 'random' for faster
)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  4.40it/s]


In [6]:
X = explainer.inference._preprocess_input(json_data, include_lag=True)
X

Unnamed: 0,sqm,yearbuilt,numberoffloors,occupants,airTemperature,cloudCoverage,dewTemperature,windSpeed,seaLvlPressure,precipDepth1HR,...,electricity_lag1,electricity_lag24,electricity_lag168,electricity_rolling_mean_24h,electricity_rolling_std_24h,electricity_rolling_mean_7d,primaryspaceusage,sub_primaryspaceusage,site_id,timezone
0,5261.7,1953.0,5.0,200.0,25.0,30.0,18.0,2.6,1020.7,0.0,...,94.75,0.0,0.0,98.125,3.802999,98.125,0,21,0,5


In [7]:
query_instance = explainer._prepare_query_instance(json_data, X)

In [8]:
query_instance

Unnamed: 0,sqm,yearbuilt,numberoffloors,occupants,airTemperature,cloudCoverage,dewTemperature,windSpeed,seaLvlPressure,precipDepth1HR,...,electricity_lag24,electricity_lag168,electricity_rolling_mean_24h,electricity_rolling_std_24h,electricity_rolling_mean_7d,primaryspaceusage,sub_primaryspaceusage,site_id,timezone,electricity_consumption
0,5261.7,1953.0,5.0,200.0,25.0,30.0,18.0,2.6,1020.7,0.0,...,0.0,0.0,98.125,3.802999,98.125,Education,Education,Bear,US/Pacific,87.868225


In [9]:
time_categorical_features = ['hour', 'day_of_week', 'month', 'year', 'is_weekend']
for col in time_categorical_features:
    if col in query_instance.columns and col not in explainer.dice_data.continuous_feature_names:
        # DiCE treats this as categorical - ensure it's an integer string with object dtype
        try:
            val = query_instance[col].iloc[0]
            # Convert to int then string to avoid '0.0'
            if isinstance(val, str):
                # If it's already a string, check if it has decimal
                if '.' in val:
                    int_val = int(float(val))
                    query_instance[col] = str(int_val)
                # Already an integer string, keep it
            else:
                # Convert numeric to integer string
                int_val = int(float(val))
                query_instance[col] = str(int_val)
            
            # CRITICAL: Set dtype to object (string) to prevent float conversion
            query_instance[col] = query_instance[col].astype('object')
        except (ValueError, TypeError):
            # If conversion fails, use original value from json_data
            if col in json_data:
                try:
                    int_val = int(float(json_data[col]))
                    query_instance[col] = str(int_val)
                    query_instance[col] = query_instance[col].astype('object')
                except:
                    pass

    # Get permitted ranges for actionable features
    permitted_range = explainer._get_permitted_ranges(query_instance, json_data)
    
    # Remove target column from query instance (DiCE doesn't want it)
    query_instance_for_dice = query_instance.drop(columns=['electricity_consumption'], errors='ignore')
    

In [10]:
permitted_range

{'occupants': [100.0, 200.0]}

In [11]:
query_instance_for_dice

Unnamed: 0,sqm,yearbuilt,numberoffloors,occupants,airTemperature,cloudCoverage,dewTemperature,windSpeed,seaLvlPressure,precipDepth1HR,...,electricity_lag1,electricity_lag24,electricity_lag168,electricity_rolling_mean_24h,electricity_rolling_std_24h,electricity_rolling_mean_7d,primaryspaceusage,sub_primaryspaceusage,site_id,timezone
0,5261.7,1953.0,5.0,200.0,25.0,30.0,18.0,2.6,1020.7,0.0,...,94.75,0.0,0.0,98.125,3.802999,98.125,Education,Education,Bear,US/Pacific


In [21]:
cf_params = {
    'query_instances': query_instance_for_dice,
    'total_CFs': 5,
    'desired_range': [0.0, 40.0],
    'permitted_range': permitted_range
}

In [22]:
counterfactuals = explainer.explainer.generate_counterfactuals(**cf_params)
counterfactuals

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  4.23it/s]


<dice_ml.counterfactual_explanations.CounterfactualExplanations at 0x133c62210>

In [23]:
cf_example = counterfactuals.cf_examples_list[0]

In [24]:
cf_df = cf_example.final_cfs_df

In [25]:
# Get predictions for counterfactuals
cf_predictions = []
recommendations = []

for idx, cf_row in cf_df.iterrows():
    # Convert counterfactual back to dict format
    cf_dict = cf_row.to_dict()
    
    # Predict consumption for this counterfactual
    # Need to prepare it properly for prediction
    cf_data = json_data.copy()
    
    # Update with counterfactual values
    for feat, value in cf_dict.items():
        if feat != 'electricity_consumption' and feat in cf_data:
            # Handle different naming conventions
            if feat in cf_data:
                cf_data[feat] = value
            elif feat.replace('_', '') in [k.replace('_', '') for k in cf_data.keys()]:
                # Find matching key
                for key in cf_data.keys():
                    if key.replace('_', '') == feat.replace('_', ''):
                        cf_data[key] = value
                        break
    
    # Predict
    try:
        cf_pred = explainer.inference.predict(cf_data, include_lag=False)
        cf_predictions.append(cf_pred)
        
        # Calculate changes
        changes = explainer._calculate_changes(json_data, cf_dict, current_prediction, cf_pred)
        
        recommendations.append({
            'counterfactual_id': idx,
            'predicted_consumption': float(cf_pred),
            'reduction': float(current_prediction - cf_pred),
            'reduction_pct': float((current_prediction - cf_pred) / current_prediction * 100),
            'below_threshold': cf_pred <= 40.0,
            'changes': changes
        })
    except Exception as e:
        print(f"‚ö†Ô∏è  Error predicting counterfactual {idx}: {e}")
        continue

In [26]:
cf_predictions

[np.float32(1.3681715),
 np.float32(2.0377307),
 np.float32(1.3457264),
 np.float32(1.3751894),
 np.float32(1.3681715)]

In [27]:
recommendations

[{'counterfactual_id': 0,
  'predicted_consumption': 1.3681714534759521,
  'reduction': 86.50005340576172,
  'reduction_pct': 98.44293212890625,
  'below_threshold': np.True_,
  'changes': []},
 {'counterfactual_id': 0,
  'predicted_consumption': 2.0377306938171387,
  'reduction': 85.83049774169922,
  'reduction_pct': 97.68092346191406,
  'below_threshold': np.True_,
  'changes': []},
 {'counterfactual_id': 0,
  'predicted_consumption': 1.3457263708114624,
  'reduction': 86.52249908447266,
  'reduction_pct': 98.46846771240234,
  'below_threshold': np.True_,
  'changes': []},
 {'counterfactual_id': 0,
  'predicted_consumption': 1.3751894235610962,
  'reduction': 86.49303436279297,
  'reduction_pct': 98.4349365234375,
  'below_threshold': np.True_,
  'changes': []},
 {'counterfactual_id': 0,
  'predicted_consumption': 1.3681714534759521,
  'reduction': 86.50005340576172,
  'reduction_pct': 98.44293212890625,
  'below_threshold': np.True_,
  'changes': []}]

In [28]:
result = {
    'success': True,
    'current_prediction': float(current_prediction),
    'threshold': float(40.0),
    'below_threshold': False,
    'needs_reduction': float(current_prediction - 40.0),
    'total_counterfactuals': len(recommendations),
    'recommendations': recommendations
}

In [29]:
if result['success']:
    print(f"\n‚úÖ Generated {result['total_counterfactuals']} recommendations")
    print(f"\nüìã Top Recommendations:")
    
    for i, rec in enumerate(result['recommendations'][:3], 1):
        print(f"\n--- Recommendation {i} ---")
        print(f"Predicted consumption: {rec['predicted_consumption']:.2f} kWh")
        print(f"Reduction: {rec['reduction']:.2f} kWh ({rec['reduction_pct']:.1f}%)")
        print(f"Below threshold: {'‚úÖ Yes' if rec['below_threshold'] else '‚ùå No'}")
        
        if rec['changes']:
            print(f"\nKey changes needed:")
            for change in rec['changes'][:5]:  # Top 5 changes
                print(f"  ‚Ä¢ {change['action']}")
                print(f"    ({change['description']})")
else:
    print(f"\n‚ùå Error: {result.get('error', 'Unknown error')}")
    if 'error_details' in result:
        print(f"\nError details:\n{result['error_details']}")


‚úÖ Generated 5 recommendations

üìã Top Recommendations:

--- Recommendation 1 ---
Predicted consumption: 1.37 kWh
Reduction: 86.50 kWh (98.4%)
Below threshold: ‚úÖ Yes

--- Recommendation 2 ---
Predicted consumption: 2.04 kWh
Reduction: 85.83 kWh (97.7%)
Below threshold: ‚úÖ Yes

--- Recommendation 3 ---
Predicted consumption: 1.35 kWh
Reduction: 86.52 kWh (98.5%)
Below threshold: ‚úÖ Yes


# ============================================================================
# PREDICT FUTURE WITH CURRENT CONSUMPTION + DICE MONITORING
# ============================================================================
# 
# Use case: 
# 1. Input: electricity_consumption t·∫°i th·ªùi ƒëi·ªÉm hi·ªán t·∫°i (21:00) = 50.0 kWh
# 2. D·ª± ƒëo√°n future 24 gi·ªù: d√πng prediction t·∫°i 22:00 ƒë·ªÉ predict 23:00, ...
# 3. DICE s·∫Ω ki·ªÉm tra threshold v√† ƒë∆∞a ra recommendations khi v∆∞·ª£t ng∆∞·ª°ng

In [None]:
# Example: Predict future with current consumption and monitor threshold
print("\n" + "=" * 80)
print("EXAMPLE: Predict Future with Current Consumption + DICE Monitoring")
print("=" * 80)

# Input data t·∫°i th·ªùi ƒëi·ªÉm 21:00
json_data = {
    'time': '2016-01-01T21:00:00',
    'building_id': 'Bear_education_Sharon',
    'site_id': 'Bear',
    'primaryspaceusage': 'Education',
    'sub_primaryspaceusage': 'Education',
    'sqm': 5261.7,
    'yearbuilt': 1953,
    'numberoffloors': 5,
    'occupants': 200,
    'timezone': 'US/Pacific',
    'airTemperature': 25.0,
    'cloudCoverage': 30.0,
    'dewTemperature': 18.0,
    'windSpeed': 2.6,
    'seaLvlPressure': 1020.7,
    'precipDepth1HR': 0.0
}

# ƒêi·ªán ti√™u th·ª• hi·ªán t·∫°i t·∫°i 21:00
current_consumption = 50.0  # kWh

# D·ª± ƒëo√°n 24 gi·ªù t·ªõi v·ªõi threshold = 50.0 kWh
result = explainer.predict_future_with_monitoring(
    json_data=json_data,
    current_electricity_consumption=current_consumption,
    hours=24,
    threshold=50.0
)

print(f"\n‚úÖ Predictions completed!")
print(f"   - Total hours predicted: {result['summary']['total_hours']}")
print(f"   - Hours above threshold: {result['summary']['hours_above_threshold']}")
print(f"   - Max consumption: {result['summary']['max_consumption']:.2f} kWh")
print(f"   - Min consumption: {result['summary']['min_consumption']:.2f} kWh")
print(f"   - Mean consumption: {result['summary']['mean_consumption']:.2f} kWh")

In [None]:
# Hi·ªÉn th·ªã predictions
print("\n" + "=" * 80)
print("PREDICTIONS FOR NEXT 24 HOURS")
print("=" * 80)
print(result['predictions'].head(10))

In [None]:
# Hi·ªÉn th·ªã alerts (c√°c th·ªùi ƒëi·ªÉm v∆∞·ª£t threshold)
if result['alerts']:
    print("\n" + "=" * 80)
    print(f"‚ö†Ô∏è  ALERTS: {len(result['alerts'])} time points exceeded threshold")
    print("=" * 80)
    
    for i, alert in enumerate(result['alerts'][:5], 1):  # Show first 5 alerts
        print(f"\n--- Alert {i} ---")
        print(f"Timestamp: {alert['timestamp']}")
        print(f"Hour: {alert['hour']}")
        print(f"Predicted consumption: {alert['predicted_consumption']:.2f} kWh")
        print(f"Threshold: {alert['threshold']:.2f} kWh")
        print(f"Exceeded by: {alert['exceeded_by']:.2f} kWh ({alert['exceeded_by_pct']:.1f}%)")
        
        if alert['recommendations']:
            print(f"\nüìã Top Recommendations:")
            for j, rec in enumerate(alert['recommendations'][:2], 1):  # Show top 2 recommendations
                print(f"  {j}. Predicted: {rec['predicted_consumption']:.2f} kWh")
                print(f"     Reduction: {rec['reduction']:.2f} kWh ({rec['reduction_pct']:.1f}%)")
                print(f"     Below threshold: {'‚úÖ Yes' if rec['below_threshold'] else '‚ùå No'}")
                if rec.get('changes'):
                    print(f"     Key changes:")
                    for change in rec['changes'][:2]:  # Show top 2 changes
                        print(f"       ‚Ä¢ {change['action']}")
else:
    print("\n‚úÖ No alerts - all predictions are below threshold!")

In [None]:
# Visualize predictions with threshold line
import matplotlib.pyplot as plt
import pandas as pd

if 'result' in locals() and result.get('success'):
    fig, ax = plt.subplots(figsize=(14, 6))
    
    predictions_df = result['predictions']
    ax.plot(predictions_df['timestamp'], predictions_df['predicted_consumption'], 
            marker='o', linewidth=2, markersize=4, label='Predicted Consumption')
    
    # Draw threshold line
    threshold = result['threshold']
    ax.axhline(y=threshold, color='r', linestyle='--', linewidth=2, 
               label=f'Threshold ({threshold} kWh)')
    
    # Highlight alerts
    if result['alerts']:
        alert_timestamps = [alert['timestamp'] for alert in result['alerts']]
        alert_consumptions = [alert['predicted_consumption'] for alert in result['alerts']]
        ax.scatter(alert_timestamps, alert_consumptions, 
                  color='red', s=100, zorder=5, label='Threshold Exceeded', marker='x')
    
    ax.set_xlabel('Timestamp', fontsize=12)
    ax.set_ylabel('Electricity Consumption (kWh)', fontsize=12)
    ax.set_title('Future Predictions with Threshold Monitoring', fontsize=14, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()