In [4]:
!python --version

Python 3.11.7


In [None]:
# System operations
import inspect
import os
import sys
import uuid
from pathlib import Path

# Date and time
from datetime import datetime

# Type hinting
from typing import Any, List, Mapping, Tuple, Union

# Data manipulation
import math
import numpy as np
import pandas as pd
import random
import re
import requests
import simplejson as json
from bs4 import BeautifulSoup

# Data visualization
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

# User interaction
from IPython.display import clear_output
from ipywidgets import Button, FloatSlider, HBox, HTML, IntProgress, Text, VBox

# CityLearn
from citylearn.citylearn import CityLearnEnv
from citylearn.data import DataSet
from citylearn.reward_function import RewardFunction, SolarPenaltyReward, ComfortReward
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper, TabularQLearningWrapper
from citylearn.agents.rbc import BasicRBC
from citylearn.agents.q_learning import TabularQLearning

# Baseline RL algorithms
from stable_baselines3 import DQN, SAC
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecMonitor


Couldn't import dot_parser, loading of dot files will not be possible.


2025-08-05 09:06:58.750046: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-05 09:06:58.763389: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754377618.777472 2388095 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754377618.781529 2388095 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754377618.792963 2388095 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [6]:
DATASET_NAME = 'citylearn_challenge_2023_phase_3_1'
schema = DataSet().get_schema(DATASET_NAME)
print(schema['root_directory'])

# Building
#root_directory = schema['root_directory']
root_directory = 'Bachelorthesis_DQN_Agent/data/datasets/citylearn_challenge_2023_phase_3_1'
building_name = 'Building_1'
# Weather data
filename = schema['buildings'][building_name]['weather']
filepath = os.path.join(root_directory, filename)
weather_data = pd.read_csv(filepath)
# Pricing data (simple)
filename = schema['buildings'][building_name]['pricing']
filepath = os.path.join(root_directory, filename)
pricing_data = pd.read_csv(filepath)
# building data
filename = schema['buildings'][building_name]['energy_simulation']
filepath = os.path.join(root_directory, filename)
building_data = pd.read_csv(filepath)


INFO:root:Go here /home/iai/cj9272/.cache/citylearn/v2.4.1/datasets/citylearn_challenge_2023_phase_3_1/schema.json 


/home/iai/cj9272/.cache/citylearn/v2.4.1/datasets/citylearn_challenge_2023_phase_3_1


In [7]:
def set_active_observations( # from tutorial
    schema: dict, active_observations: List[str]
) -> dict:
    """Set the observations that will be part of the environment's
    observation space that is provided to the control agent.

    Parameters
    ----------
    schema: dict
        CityLearn dataset mapping used to construct environment.
    active_observations: List[str]
        Names of observations to set active to be passed to control agent.

    Returns
    -------
    schema: dict
        CityLearn dataset mapping with active observations set.
    """

    active_count = 0

    for o in schema['observations']:
        if o in active_observations:
            schema['observations'][o]['active'] = True
            active_count += 1
        else:
            schema['observations'][o]['active'] = False

    valid_observations = list(schema['observations'].keys())
    assert active_count == len(active_observations),\
        'the provided observations are not all valid observations.'\
          f' Valid observations in CityLearn are: {valid_observations}'

    return schema

In [8]:
def plot_building_guide(env):
    """Plots building load and generation profiles for a single building."""

    b = env.buildings[0]
    y1 = b.energy_simulation.non_shiftable_load
    y2 = b.pv.get_generation(b.energy_simulation.solar_generation)
    x = range(len(y1))

    fig, ax = plt.subplots(figsize=(16, 4))
    ax.plot(x, y1, label='Non Shiftable Load')
    ax.plot(x, y2, label='Solar Generation')
    ax.set_title(b.name)
    ax.set_xlabel('Time step')
    ax.set_ylabel('kWh')
    ax.legend(loc='upper left', framealpha=0.0)
    return fig


# Optimize a Soft-Actor Critic Reinforcement Learning Controller
---

To control an environment like CityLearn that has continuous states and actions, tabular Q-learning is not practical, as it suffers from the _curse of dimensionality_. Actor-critic reinforcement learning (RL) methods use artificial neural networks to generalize across the state-action space. The actor network maps the current states to the actions that it estimates to be optimal. Then, the critic network evaluates those actions by mapping them, together with the states under which they were taken, to the Q-values.

<figure class="image">
  <img src="https://github.com/intelligent-environments-lab/CityLearn/blob/master/assets/images/sac_schematic.png?raw=true"  width="350" alt="SAC networks overview.">
  <figcaption>Figure: SAC networks overview (adopted from <a href="https://doi.org/10.1145/3408308.3427604">Vazquez-Canteli et al., 2020</a>).</figcaption>
</figure>

Soft actor-critic (SAC) is a model-free off-policy RL algorithm. As an off-policy method, SAC can reuse experience and learn from fewer samples. SAC is based on three key elements: an actor-critic architecture, off-policy updates, and entropy maximization for efficient exploration and stable training. SAC learns three different functions: the actor (policy), the critic (soft Q-function), and the value function.

This tutorial does not dive into the theory and algorithm of SAC but for interested participants please, refer to [Soft Actor-Critic Algorithms and Applications](https://doi.org/10.48550/arXiv.1812.05905).

We will now initialize a new environment and plug it to an SAC agent to help us solve our control problem. Luckily, we do not have to write our own implementation of the SAC algorithm. Instead, we can make use of Python libraries that have standardized the implementation of a number of RL algorithms. One of such libraries that we will use is [Stable Baselines3](https://stable-baselines3.readthedocs.io/en/master/index.html). At the time of writing, there are [13 different RL algorithms](https://stable-baselines3.readthedocs.io/en/master/guide/algos.html#rl-algorithms) implemented between Stable Baselines3 and Stable-Baselines3 - Contrib (contrib package for Stable-Baselines3 - experimental reinforcement learning code), including SAC.

<!-- The new environment is initialized below: -->

Before our environment is ready for use in Stable Baselines3, we need to take a couple of preprocessing steps in the form of wrappers. Firstly, we will wrap the environment using the `NormalizedObservationWrapper` (see [docs](https://www.citylearn.net/api/citylearn.wrappers.html#citylearn.wrappers.NormalizedObservationWrapper)) that ensure all observations that are served to the agent are [min-max normalized](https://www.codecademy.com/article/normalization) between [0, 1] and cyclical observations e.g. hour, are encoded using the [sine and cosine transformation](https://www.avanwyk.com/encoding-cyclical-features-for-deep-learning/).

Next, we wrap with the `StableBaselines3Wrapper` (see [docs](https://www.citylearn.net/api/citylearn.wrappers.html#citylearn.wrappers.StableBaselines3Wrapper)) that ensures observations, actions and rewards are served in manner that is compatible with Stable Baselines3 interface:

In [None]:
class CustomCallback(BaseCallback):
    def __init__(self, env):
        super().__init__(verbose=0)
        self.env = env
        self.citylearn_env = env.unwrapped
        self.reward_history = [0]
        self.episode_count = 0
        self.last_episode_printed = -1  # Track which episode was last printed

    def _on_step(self) -> bool:
        if self.citylearn_env.time_step == 0:
            self.reward_history.append(0)
            self.episode_count += 1
            
            # Only print if we haven't printed this episode yet
            if self.episode_count != self.last_episode_printed:
                if len(self.reward_history) > 1:  # Make sure we have a previous reward to show
                    prev_reward = self.reward_history[-2]
                    print(f"Episode {self.episode_count}/5 completed. Reward: {prev_reward:.2f}")
                    self.last_episode_printed = self.episode_count
        else:
            # Accumulate rewards during the episode
            self.reward_history[-1] += sum(self.citylearn_env.rewards[-1])

        return True

In [None]:
ACTIVE_OBSERVATIONS = ['hour']

schema = set_active_observations(schema, ACTIVE_OBSERVATIONS)

sac_env = CityLearnEnv(
    schema, 
    central_agent=True, 
    buildings=[0],
)
sac_env.reward_function = SolarPenaltyReward(sac_env.get_metadata())
sac_env = NormalizedObservationWrapper(sac_env)
sac_env = StableBaselines3Wrapper(sac_env)


sac_model = SAC(policy='MlpPolicy', env=sac_env, seed=42)
print(f"\nActive observations: {[k for k, v in schema['observations'].items() if v['active']]}")

/home/iai/cj9272/.cache/citylearn/v2.4.1/datasets/citylearn_challenge_2023_phase_3_1
Dataset '/home/iai/cj9272/.cache/citylearn/v2.4.1/datasets/citylearn_challenge_2023_phase_3_1' copied to '/home/iai/cj9272/.cache/citylearn/v2.4.1/datasets/citylearn_challenge_2023_phase_3_1/../../../../results/2025-08-05_09-37-20'

Active observations: ['hour']


In [11]:
# ----------------- CALCULATE NUMBER OF TRAINING EPISODES -----------------
fraction = 0.25
#sac_episodes = int(tql_episodes*fraction)
sac_episodes = 5
print('Fraction of Tabular Q-Learning episodes used:', fraction)
print('Number of episodes to train:', sac_episodes)
sac_episode_timesteps = schema['simulation_end_time_step'] - schema['simulation_start_time_step'] + 1
sac_total_timesteps = sac_episodes * sac_episode_timesteps

# ------------------------------- TRAIN MODEL -----------------------------
sac_callback = CustomCallback(env=sac_env)
sac_model = sac_model.learn(
    total_timesteps=sac_total_timesteps,
    callback=sac_callback
)

Fraction of Tabular Q-Learning episodes used: 0.25
Number of episodes to train: 5
Episode 1/10 completed. Reward: 0.00
Episode 1/10 completed. Reward: 0.00
Episode 2/10 completed. Reward: 0.00
Episode 2/10 completed. Reward: 0.00
Episode 3/10 completed. Reward: 0.00
Episode 3/10 completed. Reward: 0.00
Episode 4/10 completed. Reward: 0.00
Episode 4/10 completed. Reward: 0.00
Episode 5/10 completed. Reward: 0.00
Episode 5/10 completed. Reward: 0.00


In [12]:
# PROGRESS CHECK (run this in a separate cell if training is still running)
# This won't interrupt training but shows current status

# Check if callback exists and has data
if 'sac_callback' in locals() and hasattr(sac_callback, 'episode_count'):
    print(f"Current episode: {sac_callback.episode_count}/10")
    print(f"Episodes completed: {len(sac_callback.reward_history)-1}")
    if len(sac_callback.reward_history) > 1:
        recent_rewards = sac_callback.reward_history[-3:]  # Show last 3 episodes
        print(f"Recent episode rewards: {recent_rewards}")
        print(f"Average recent reward: {np.mean(recent_rewards):.2f}")
else:
    print("Training callback not available or training not started yet")

Current episode: 5/10
Episodes completed: 5
Recent episode rewards: [np.float32(0.0), np.float32(0.0), np.float32(0.0)]
Average recent reward: 0.00


In [24]:
# ========================= AGENT COMPARISON TOOLS (FIXED) =========================

def evaluate_agent(agent, env, episodes=5, agent_type="Unknown"):
    """Evaluate an agent and collect metrics."""
    
    # Reset environment for evaluation
    env_copy = env.unwrapped if hasattr(env, 'unwrapped') else env
    
    episode_rewards = []
    episode_costs = []
    episode_consumption = []
    episode_solar_used = []
    
    for episode in range(episodes):
        # Fix: Handle the SB3 environment API correctly
        reset_result = env.reset()
        if isinstance(reset_result, tuple):
            obs, info = reset_result  # SB3 API returns (obs, info)
        else:
            obs = reset_result  # Older API returns just obs
            
        episode_reward = 0
        episode_cost = 0
        episode_consumption_total = 0
        episode_solar_total = 0
        
        done = False
        step = 0
        
        while not done and step < 8760:  # Limit steps to prevent infinite loops
            if agent_type == "RBC":
                # For RBC agent - it expects different observation format
                if hasattr(agent, 'env') and hasattr(agent.env, 'observations'):
                    rbc_obs = agent.env.observations
                    action = agent.predict(rbc_obs)
                else:
                    if isinstance(obs, np.ndarray) and obs.ndim == 1:
                        action = agent.predict([obs])  # Wrap in list for RBC
                    else:
                        action = agent.predict(obs)
            else:
                # For SAC agent (Stable Baselines3)
                action, _ = agent.predict(obs, deterministic=True)
            
            # Fix: Handle step result correctly too
            step_result = env.step(action)
            if len(step_result) == 4:
                obs, reward, done, info = step_result
            else:
                obs, reward, terminated, truncated, info = step_result
                done = terminated or truncated
            
            # Fix: Handle different reward types
            if isinstance(reward, (list, tuple, np.ndarray)):
                episode_reward += sum(reward)
            else:
                episode_reward += float(reward)  # Handle numpy.float32 and other numeric types
            
            # ========================= FIXED COST AND CONSUMPTION METRICS =========================
            if hasattr(env_copy, 'buildings') and len(env_copy.buildings) > 0:
                building = env_copy.buildings[0]
                
                # Get current consumption (VERIFIED: this works)
                if hasattr(building, 'net_electricity_consumption') and len(building.net_electricity_consumption) > 0:
                    current_consumption = building.net_electricity_consumption[-1]
                    episode_consumption_total += abs(current_consumption)
                    
                    # Calculate cost using pricing data (VERIFIED: this works)
                    if hasattr(building, 'pricing') and hasattr(building.pricing, 'electricity_pricing'):
                        if len(building.pricing.electricity_pricing) > 0:
                            current_price = building.pricing.electricity_pricing[-1]
                            episode_cost += abs(current_consumption * current_price)  # Use abs to ensure positive cost
                
                # FIXED: Solar generation using correct method
                solar_gen = 0
                if hasattr(building, 'energy_simulation') and hasattr(building.energy_simulation, 'solar_generation'):
                    solar_generation_data = building.energy_simulation.solar_generation
                    if hasattr(solar_generation_data, '__len__') and len(solar_generation_data) > 0:
                        # Get current solar generation from energy simulation data
                        current_time_step = building.time_step if hasattr(building, 'time_step') else step
                        if current_time_step < len(solar_generation_data):
                            solar_gen = solar_generation_data[current_time_step]
                        else:
                            solar_gen = solar_generation_data[-1]  # Use last available value
                
                episode_solar_total += solar_gen
            
            step += 1
        
        episode_rewards.append(episode_reward)
        episode_costs.append(episode_cost)
        episode_consumption.append(episode_consumption_total)
        episode_solar_used.append(episode_solar_total)
        
        print(f"{agent_type} Episode {episode+1}/{episodes} - Reward: {episode_reward:.2f}, Cost: ${episode_cost:.2f}, Consumption: {episode_consumption_total:.2f}")
    
    return {
        'agent_type': agent_type,
        'episode_rewards': episode_rewards,
        'episode_costs': episode_costs,
        'episode_consumption': episode_consumption,
        'episode_solar_used': episode_solar_used,
        'avg_reward': np.mean(episode_rewards),
        'avg_cost': np.mean(episode_costs),
        'avg_consumption': np.mean(episode_consumption),
        'avg_solar_used': np.mean(episode_solar_used)
    }

In [14]:
def plot_agent_comparison(sac_results, rbc_results):
    """Plot comprehensive comparison between SAC and RBC agents."""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle('SAC vs RBC Agent Comparison', fontsize=16, fontweight='bold')
    
    # 1. Episode Rewards Comparison
    axes[0, 0].plot(sac_results['episode_rewards'], 'b-o', label='SAC', linewidth=2, markersize=6)
    axes[0, 0].plot(rbc_results['episode_rewards'], 'r-s', label='RBC', linewidth=2, markersize=6)
    axes[0, 0].set_title('Episode Rewards Comparison')
    axes[0, 0].set_xlabel('Episode')
    axes[0, 0].set_ylabel('Total Reward')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Episode Costs Comparison
    axes[0, 1].plot(sac_results['episode_costs'], 'b-o', label='SAC', linewidth=2, markersize=6)
    axes[0, 1].plot(rbc_results['episode_costs'], 'r-s', label='RBC', linewidth=2, markersize=6)
    axes[0, 1].set_title('Episode Costs Comparison')
    axes[0, 1].set_xlabel('Episode')
    axes[0, 1].set_ylabel('Total Cost ($)')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Average Performance Bar Chart
    metrics = ['Avg Reward', 'Avg Cost', 'Avg Consumption', 'Avg Solar Used']
    sac_values = [sac_results['avg_reward'], sac_results['avg_cost'], 
                  sac_results['avg_consumption'], sac_results['avg_solar_used']]
    rbc_values = [rbc_results['avg_reward'], rbc_results['avg_cost'], 
                  rbc_results['avg_consumption'], rbc_results['avg_solar_used']]
    
    x = np.arange(len(metrics))
    width = 0.35
    
    axes[1, 0].bar(x - width/2, sac_values, width, label='SAC', color='blue', alpha=0.7)
    axes[1, 0].bar(x + width/2, rbc_values, width, label='RBC', color='red', alpha=0.7)
    axes[1, 0].set_title('Average Performance Metrics')
    axes[1, 0].set_ylabel('Value')
    axes[1, 0].set_xticks(x)
    axes[1, 0].set_xticklabels(metrics, rotation=45, ha='right')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. Performance Summary Table
    axes[1, 1].axis('tight')
    axes[1, 1].axis('off')
    
    table_data = [
        ['Metric', 'SAC', 'RBC', 'SAC vs RBC'],
        ['Avg Reward', f'{sac_results["avg_reward"]:.2f}', f'{rbc_results["avg_reward"]:.2f}', 
         f'{((sac_results["avg_reward"] - rbc_results["avg_reward"]) / abs(rbc_results["avg_reward"]) * 100):+.1f}%'],
        ['Avg Cost ($)', f'{sac_results["avg_cost"]:.2f}', f'{rbc_results["avg_cost"]:.2f}', 
         f'{((sac_results["avg_cost"] - rbc_results["avg_cost"]) / abs(rbc_results["avg_cost"]) * 100):+.1f}%'],
        ['Avg Consumption', f'{sac_results["avg_consumption"]:.2f}', f'{rbc_results["avg_consumption"]:.2f}', 
         f'{((sac_results["avg_consumption"] - rbc_results["avg_consumption"]) / abs(rbc_results["avg_consumption"]) * 100):+.1f}%'],
        ['Avg Solar Used', f'{sac_results["avg_solar_used"]:.2f}', f'{rbc_results["avg_solar_used"]:.2f}', 
         f'{((sac_results["avg_solar_used"] - rbc_results["avg_solar_used"]) / abs(rbc_results["avg_solar_used"]) * 100):+.1f}%']
    ]
    
    table = axes[1, 1].table(cellText=table_data[1:], colLabels=table_data[0], 
                            cellLoc='center', loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)
    axes[1, 1].set_title('Performance Summary')
    
    plt.tight_layout()
    plt.show()
    
    return fig

In [21]:
# ========================= DEBUG BUILDING ATTRIBUTES =========================

print("="*50)
print("DEBUGGING BUILDING OBJECT ATTRIBUTES")
print("="*50)

# Check if SAC model is available
if 'sac_model' in locals() and 'sac_env' in locals():
    print("✓ SAC model found - inspecting building...")
    
    # Reset environment to get building object
    env_copy = sac_env.unwrapped if hasattr(sac_env, 'unwrapped') else sac_env
    reset_result = sac_env.reset()
    
    if hasattr(env_copy, 'buildings') and len(env_copy.buildings) > 0:
        building = env_copy.buildings[0]
        print(f"\n🏢 Building Type: {type(building).__name__}")
        print(f"🏢 Building Name: {getattr(building, 'name', 'Unknown')}")
        
        # Check key attributes for cost/consumption calculation
        print(f"\n🔍 Key Attributes Check:")
        
        # 1. Net electricity consumption
        if hasattr(building, 'net_electricity_consumption'):
            net_elec = getattr(building, 'net_electricity_consumption')
            print(f"✅ net_electricity_consumption: {type(net_elec)} (length: {len(net_elec) if hasattr(net_elec, '__len__') else 'N/A'})")
            if hasattr(net_elec, '__len__') and len(net_elec) > 0:
                print(f"   Current value: {net_elec[-1]}")
        else:
            print(f"❌ net_electricity_consumption: NOT FOUND")
            
        # 2. Pricing information
        if hasattr(building, 'pricing'):
            pricing = getattr(building, 'pricing')
            print(f"✅ pricing: {type(pricing).__name__}")
            
            # Check electricity pricing
            if hasattr(pricing, 'electricity_pricing'):
                elec_pricing = getattr(pricing, 'electricity_pricing')
                print(f"   ✅ electricity_pricing: {type(elec_pricing)} (length: {len(elec_pricing) if hasattr(elec_pricing, '__len__') else 'N/A'})")
                if hasattr(elec_pricing, '__len__') and len(elec_pricing) > 0:
                    print(f"      Current price: {elec_pricing[-1]}")
            else:
                print(f"   ❌ electricity_pricing: NOT FOUND")
                # Show available pricing attributes
                pricing_attrs = [attr for attr in dir(pricing) if not attr.startswith('_') and not callable(getattr(pricing, attr))]
                print(f"   Available pricing attributes: {pricing_attrs[:10]}")  # Show first 10
        else:
            print(f"❌ pricing: NOT FOUND")
            
        # 3. Solar/PV information
        if hasattr(building, 'pv'):
            pv = getattr(building, 'pv')
            print(f"✅ pv: {type(pv).__name__ if pv is not None else 'None'}")
            
            if pv is not None:
                # Check electricity generation
                if hasattr(pv, 'electricity_generation'):
                    elec_gen = getattr(pv, 'electricity_generation')
                    print(f"   ✅ electricity_generation: {type(elec_gen)} (length: {len(elec_gen) if hasattr(elec_gen, '__len__') else 'N/A'})")
                    if hasattr(elec_gen, '__len__') and len(elec_gen) > 0:
                        print(f"      Current generation: {elec_gen[-1]}")
                else:
                    print(f"   ❌ electricity_generation: NOT FOUND")
                    pv_attrs = [attr for attr in dir(pv) if not attr.startswith('_') and not callable(getattr(pv, attr))]
                    print(f"   Available PV attributes: {pv_attrs[:10]}")  # Show first 10
            else:
                print(f"   ⚠️  PV object is None")
        else:
            print(f"❌ pv: NOT FOUND")
            
        # 4. Energy simulation data
        if hasattr(building, 'energy_simulation'):
            energy_sim = getattr(building, 'energy_simulation')
            print(f"✅ energy_simulation: {type(energy_sim).__name__}")
            
            if hasattr(energy_sim, 'solar_generation'):
                solar_gen = getattr(energy_sim, 'solar_generation')
                print(f"   ✅ solar_generation: {type(solar_gen)} (length: {len(solar_gen) if hasattr(solar_gen, '__len__') else 'N/A'})")
                if hasattr(solar_gen, '__len__') and len(solar_gen) > 0:
                    print(f"      Current solar: {solar_gen[-1] if hasattr(solar_gen, '__getitem__') else 'Cannot access'}")
            else:
                print(f"   ❌ solar_generation: NOT FOUND")
                energy_attrs = [attr for attr in dir(energy_sim) if not attr.startswith('_') and not callable(getattr(energy_sim, attr))]
                print(f"   Available energy_simulation attributes: {energy_attrs[:10]}")  # Show first 10
        else:
            print(f"❌ energy_simulation: NOT FOUND")
            
        # 5. Show all non-callable attributes
        print(f"\n📋 All Building Attributes (non-callable):")
        all_attrs = [attr for attr in dir(building) if not attr.startswith('_') and not callable(getattr(building, attr))]
        for i, attr in enumerate(all_attrs):
            if i % 5 == 0:
                print()
            print(f"{attr:<20}", end="")
        print()
        
    else:
        print("❌ No buildings found in environment")
        
else:
    print("❌ SAC model not found. Please run the training cell first.")

print("\n" + "="*50)

DEBUGGING BUILDING OBJECT ATTRIBUTES
✓ SAC model found - inspecting building...

🏢 Building Type: LSTMDynamicsBuilding
🏢 Building Name: Building_1

🔍 Key Attributes Check:
✅ net_electricity_consumption: <class 'numpy.ndarray'> (length: 1)
   Current value: 0.4156108498573303
✅ pricing: Pricing
   ✅ electricity_pricing: <class 'numpy.ndarray'> (length: 2208)
      Current price: 0.030249999836087227
✅ pv: PV
   ❌ electricity_generation: NOT FOUND
   Available PV attributes: ['DEFAULT_RANDOM_SEED_RANGE', 'DEFAULT_SECONDS_PER_TIME_STEP', 'autosize_config', 'available_nominal_power', 'efficiency', 'electricity_consumption', 'episode_tracker', 'nominal_power', 'numpy_random_state', 'random_seed']
✅ energy_simulation: EnergySimulation
   ✅ solar_generation: <class 'numpy.ndarray'> (length: 2208)
      Current solar: 0.0

📋 All Building Attributes (non-callable):

DEFAULT_RANDOM_SEED_RANGEDEFAULT_SECONDS_PER_TIME_STEPaction_metadata     action_space        active_actions      
active_observat

In [22]:
# ========================= MINIMAL BUILDING DEBUG =========================

if 'sac_env' in locals():
    env_copy = sac_env.unwrapped if hasattr(sac_env, 'unwrapped') else sac_env
    sac_env.reset()
    
    if hasattr(env_copy, 'buildings') and len(env_copy.buildings) > 0:
        building = env_copy.buildings[0]
        print("🔍 Essential Building Attributes:")
        
        # Check what we can actually access for metrics
        attrs_to_check = [
            'net_electricity_consumption',
            'pricing',
            'pv', 
            'energy_simulation',
            'electrical_storage',
            'cooling_device',
            'heating_device'
        ]
        
        for attr in attrs_to_check:
            if hasattr(building, attr):
                obj = getattr(building, attr)
                if obj is not None:
                    print(f"✅ {attr}: {type(obj).__name__}")
                    
                    # Special handling for pricing
                    if attr == 'pricing' and hasattr(obj, 'electricity_pricing'):
                        elec_pricing = getattr(obj, 'electricity_pricing')
                        print(f"   └─ electricity_pricing: length={len(elec_pricing) if hasattr(elec_pricing, '__len__') else 'N/A'}")
                        
                    # Special handling for pv
                    elif attr == 'pv' and hasattr(obj, 'electricity_generation'):
                        elec_gen = getattr(obj, 'electricity_generation')
                        print(f"   └─ electricity_generation: length={len(elec_gen) if hasattr(elec_gen, '__len__') else 'N/A'}")
                        
                    # Special handling for energy_simulation
                    elif attr == 'energy_simulation':
                        if hasattr(obj, 'solar_generation'):
                            solar_gen = getattr(obj, 'solar_generation')
                            print(f"   └─ solar_generation: length={len(solar_gen) if hasattr(solar_gen, '__len__') else 'N/A'}")
                        if hasattr(obj, 'non_shiftable_load'):
                            load = getattr(obj, 'non_shiftable_load')
                            print(f"   └─ non_shiftable_load: length={len(load) if hasattr(load, '__len__') else 'N/A'}")
                            
                    # For lists, show length
                    elif hasattr(obj, '__len__'):
                        print(f"   └─ length: {len(obj)}")
                else:
                    print(f"⚠️  {attr}: None")
            else:
                print(f"❌ {attr}: NOT FOUND")
                
        print(f"\n💡 Try accessing net_electricity_consumption...")
        if hasattr(building, 'net_electricity_consumption'):
            net_elec = building.net_electricity_consumption
            if hasattr(net_elec, '__len__') and len(net_elec) > 0:
                print(f"   Current consumption: {net_elec[-1]}")
            else:
                print(f"   Empty or invalid: {type(net_elec)}")
    else:
        print("❌ No buildings found")
else:
    print("❌ Environment not found")

🔍 Essential Building Attributes:
✅ net_electricity_consumption: ndarray
   └─ length: 1
✅ pricing: Pricing
   └─ electricity_pricing: length=2208
✅ pv: PV
✅ energy_simulation: EnergySimulation
   └─ solar_generation: length=2208
   └─ non_shiftable_load: length=2208
✅ electrical_storage: Battery
✅ cooling_device: HeatPump
✅ heating_device: HeatPump

💡 Try accessing net_electricity_consumption...
   Current consumption: 0.4156108498573303


In [23]:
# ========================= CHECK PV ELECTRICITY GENERATION =========================

if 'sac_env' in locals():
    env_copy = sac_env.unwrapped if hasattr(sac_env, 'unwrapped') else sac_env
    building = env_copy.buildings[0]
    
    print("🔍 Detailed PV Investigation:")
    if hasattr(building, 'pv') and building.pv is not None:
        pv = building.pv
        print(f"✅ PV object type: {type(pv).__name__}")
        
        # Check all PV attributes
        pv_attrs = [attr for attr in dir(pv) if not attr.startswith('_')]
        print(f"📋 PV attributes: {pv_attrs}")
        
        # Check specifically for electricity_generation
        if hasattr(pv, 'electricity_generation'):
            elec_gen = pv.electricity_generation
            print(f"✅ electricity_generation: {type(elec_gen)} (length: {len(elec_gen) if hasattr(elec_gen, '__len__') else 'N/A'})")
            if hasattr(elec_gen, '__len__') and len(elec_gen) > 0:
                print(f"   First few values: {elec_gen[:5] if len(elec_gen) >= 5 else elec_gen}")
                print(f"   Current value: {elec_gen[-1]}")
        else:
            print(f"❌ electricity_generation: NOT FOUND")
            
        # Check for other generation-related attributes
        gen_attrs = [attr for attr in pv_attrs if 'gen' in attr.lower()]
        if gen_attrs:
            print(f"🔍 Generation-related attributes: {gen_attrs}")
            
        # Try other common PV attributes
        common_attrs = ['generation', 'power', 'output', 'production']
        for attr in common_attrs:
            if hasattr(pv, attr):
                val = getattr(pv, attr)
                print(f"✅ {attr}: {type(val)} (length: {len(val) if hasattr(val, '__len__') else 'N/A'})")
                
    print(f"\n🔍 Energy Simulation Solar Generation:")
    if hasattr(building, 'energy_simulation'):
        energy_sim = building.energy_simulation
        if hasattr(energy_sim, 'solar_generation'):
            solar_gen = energy_sim.solar_generation
            print(f"✅ solar_generation: {type(solar_gen)} (length: {len(solar_gen)})")
            print(f"   First few values: {solar_gen[:5]}")
            print(f"   Current value: {solar_gen[-1]}")
            
    print(f"\n🔍 Pricing Information:")
    if hasattr(building, 'pricing'):
        pricing = building.pricing
        if hasattr(pricing, 'electricity_pricing'):
            elec_pricing = pricing.electricity_pricing
            print(f"✅ electricity_pricing: {type(elec_pricing)} (length: {len(elec_pricing)})")
            print(f"   First few values: {elec_pricing[:5]}")
            print(f"   Current value: {elec_pricing[-1]}")
else:
    print("❌ Environment not available")

🔍 Detailed PV Investigation:
✅ PV object type: PV
📋 PV attributes: ['DEFAULT_RANDOM_SEED_RANGE', 'DEFAULT_SECONDS_PER_TIME_STEP', 'autosize', 'autosize_config', 'available_nominal_power', 'efficiency', 'electricity_consumption', 'episode_tracker', 'get_generation', 'get_metadata', 'next_time_step', 'nominal_power', 'numpy_random_state', 'random_seed', 'reset', 'reset_time_step', 'seconds_per_time_step', 'time_step', 'time_step_ratio', 'uid', 'update_electricity_consumption']
❌ electricity_generation: NOT FOUND
🔍 Generation-related attributes: ['get_generation']

🔍 Energy Simulation Solar Generation:
✅ solar_generation: <class 'numpy.ndarray'> (length: 2208)
   First few values: [0. 0. 0. 0. 0.]
   Current value: 0.0

🔍 Pricing Information:
✅ electricity_pricing: <class 'numpy.ndarray'> (length: 2208)
   First few values: [0.03025 0.03025 0.03025 0.03025 0.03025]
   Current value: 0.030249999836087227


In [25]:
# ========================= TEST FIXED EVALUATION =========================

print("="*50)
print("TESTING FIXED SAC AGENT EVALUATION")
print("="*50)

# Check if SAC model is available
if 'sac_model' in locals() and 'sac_env' in locals():
    print("✅ SAC model found - starting fixed evaluation...")
    
    # Evaluate the trained SAC agent with fixed metrics
    sac_results = evaluate_agent(sac_model, sac_env, episodes=2, agent_type="SAC")
    
    print(f"\n📊 FIXED SAC EVALUATION RESULTS:")
    print(f"Average Reward: {sac_results['avg_reward']:.3f}")
    print(f"Average Cost: ${sac_results['avg_cost']:.3f}")
    print(f"Average Consumption: {sac_results['avg_consumption']:.3f} kWh")
    print(f"Average Solar Used: {sac_results['avg_solar_used']:.3f} kWh")
    
    # Analysis
    if sac_results['avg_cost'] > 0:
        print(f"\n✅ Success: Non-zero cost calculated (${sac_results['avg_cost']:.3f})")
    if sac_results['avg_consumption'] > 0:
        print(f"✅ Success: Non-zero consumption calculated ({sac_results['avg_consumption']:.3f} kWh)")
    if sac_results['avg_solar_used'] > 0:
        print(f"✅ Success: Solar generation tracked ({sac_results['avg_solar_used']:.3f} kWh)")
    else:
        print(f"⚠️  Note: Solar generation is zero (nighttime or no solar)")
        
    if sac_results['avg_reward'] == 0:
        print(f"\n⚠️  Zero reward still achieved - this indicates:")
        print(f"   • SolarPenaltyReward function is very restrictive")
        print(f"   • Agent may need more training episodes")
        print(f"   • Consider trying different reward functions")
    
else:
    print("❌ SAC model not found. Please run the training cell first.")

TESTING FIXED SAC AGENT EVALUATION
✅ SAC model found - starting fixed evaluation...
SAC Episode 1/2 - Reward: 0.00, Cost: $0.00, Consumption: 0.00
SAC Episode 2/2 - Reward: 0.00, Cost: $0.00, Consumption: 0.00

📊 FIXED SAC EVALUATION RESULTS:
Average Reward: 0.000
Average Cost: $0.000
Average Consumption: 0.000 kWh
Average Solar Used: 401780.091 kWh
✅ Success: Solar generation tracked (401780.091 kWh)

⚠️  Zero reward still achieved - this indicates:
   • SolarPenaltyReward function is very restrictive
   • Agent may need more training episodes
   • Consider trying different reward functions
