In [None]:
pip install CityLearn

In [None]:
pip install stable_baselines3

In [3]:
import citylearn

In [4]:
citylearn.__version__

'2.3.0'

In [7]:
pip install numpy --upgrade



In [9]:
# System operations
import inspect
import os
import uuid

# Date and time
from datetime import datetime

# type hinting
from typing import List, Mapping, Tuple

# Data visualization
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from tqdm import tqdm

# data manipulation
from bs4 import BeautifulSoup
import math
import numpy as np
import pandas as pd
import random
import re
import requests
import simplejson as json

# cityLearn
from citylearn.agents.base import (
   BaselineAgent,
   Agent as RandomAgent
)
from citylearn.agents.rbc import HourRBC
from citylearn.agents.q_learning import TabularQLearning
from citylearn.citylearn import CityLearnEnv
from citylearn.data import DataSet
from citylearn.reward_function import RewardFunction
from citylearn.wrappers import (
    NormalizedObservationWrapper,
    StableBaselines3Wrapper,
    TabularQLearningWrapper,
    NormalizedActionWrapper,
    StableBaselines3ObservationWrapper
)

# RL algorithms
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import BaseCallback

# User interaction
from IPython.display import clear_output
from ipywidgets import Button, FloatSlider, HBox, HTML
from ipywidgets import IntProgress, Text, VBox

ImportError: cannot import name '_center' from 'numpy._core.umath' (/usr/local/lib/python3.11/dist-packages/numpy/_core/umath.py)

In [None]:
print('All CityLearn datasets:', sorted(DataSet().get_dataset_names()))

NameError: name 'DataSet' is not defined

In [None]:
dataset_path = "citylearn_challenge_2023_phase_1"  # Path to the dataset directory

# Initialize the CityLearnEnv with the path
env = CityLearnEnv(dataset_path)

In [None]:
# Check number of buildings in the environment
print(f"Number of buildings: {len(env.buildings)}")

# Check the number of time steps or a sample of observations
for i, b in enumerate(env.buildings):
    print(f"Observations for Building {i}: {b.observations()}")

Number of buildings: 3
Observations for Building 0: {'day_type': 5, 'hour': 1, 'outdoor_dry_bulb_temperature': 24.66, 'outdoor_dry_bulb_temperature_predicted_1': 24.910639, 'outdoor_dry_bulb_temperature_predicted_2': 38.41596, 'outdoor_dry_bulb_temperature_predicted_3': 27.611464, 'diffuse_solar_irradiance': 0.0, 'diffuse_solar_irradiance_predicted_1': 54.625927, 'diffuse_solar_irradiance_predicted_2': 116.84289, 'diffuse_solar_irradiance_predicted_3': 0.0, 'direct_solar_irradiance': 0.0, 'direct_solar_irradiance_predicted_1': 143.32434, 'direct_solar_irradiance_predicted_2': 1020.7561, 'direct_solar_irradiance_predicted_3': 0.0, 'carbon_intensity': 0.40248835, 'indoor_dry_bulb_temperature': 23.098652, 'non_shiftable_load': 0.35683933, 'solar_generation': 0.0, 'dhw_storage_soc': 0.0, 'electrical_storage_soc': 0.2, 'net_electricity_consumption': 0.67788136, 'electricity_pricing': 0.02893, 'electricity_pricing_predicted_1': 0.02893, 'electricity_pricing_predicted_2': 0.02915, 'electricit

In [None]:
data = []
for i, b in enumerate(env.buildings):
    for t in range(env.time_steps):  # Iterate over the time steps (adjust this depending on the time steps available)
        obs = b.observations(t)  # Assuming you can specify a time step
        data.append({"Building": i, "Time Step": t, "Observations": obs})

# Convert to DataFrame
df = pd.DataFrame(data)
print(f"Total rows: {len(df)}")  # Check if you have more than 3 rows now
print(df.head())


Total rows: 2160
   Building  Time Step                                       Observations
0         0          0  {'day_type': 5, 'hour': 1, 'outdoor_dry_bulb_t...
1         0          1  {'month': 6, 'hour': 1, 'day_type': 5, 'indoor...
2         0          2  {'month': 6, 'hour': 1, 'day_type': 5, 'indoor...
3         0          3  {'month': 6, 'hour': 1, 'day_type': 5, 'indoor...
4         0          4  {'month': 6, 'hour': 1, 'day_type': 5, 'indoor...


In [None]:
# Flatten the 'Observations' column to individual columns
observations_df = pd.json_normalize(df['Observations'])

# Now, combine the flattened observations with the original 'Building' and 'Time Step' columns
final_df = pd.concat([df[['Building', 'Time Step']], observations_df], axis=1)

# Check the resulting DataFrame
print(f"Total rows: {len(final_df)}")
print(final_df.head())

Total rows: 2160
   Building  Time Step  day_type  hour  outdoor_dry_bulb_temperature  \
0         0          0         5     1                         24.66   
1         0          1         5     1                         24.66   
2         0          2         5     1                         24.66   
3         0          3         5     1                         24.66   
4         0          4         5     1                         24.66   

   outdoor_dry_bulb_temperature_predicted_1  \
0                                 24.910639   
1                                 24.910639   
2                                 24.910639   
3                                 24.910639   
4                                 24.910639   

   outdoor_dry_bulb_temperature_predicted_2  \
0                                 38.415958   
1                                 38.415958   
2                                 38.415958   
3                                 38.415958   
4                               

In [None]:
# column names
print(final_df.columns)

Index(['Building', 'Time Step', 'day_type', 'hour',
       'outdoor_dry_bulb_temperature',
       'outdoor_dry_bulb_temperature_predicted_1',
       'outdoor_dry_bulb_temperature_predicted_2',
       'outdoor_dry_bulb_temperature_predicted_3', 'diffuse_solar_irradiance',
       'diffuse_solar_irradiance_predicted_1',
       'diffuse_solar_irradiance_predicted_2',
       'diffuse_solar_irradiance_predicted_3', 'direct_solar_irradiance',
       'direct_solar_irradiance_predicted_1',
       'direct_solar_irradiance_predicted_2',
       'direct_solar_irradiance_predicted_3', 'carbon_intensity',
       'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation',
       'dhw_storage_soc', 'electrical_storage_soc',
       'net_electricity_consumption', 'electricity_pricing',
       'electricity_pricing_predicted_1', 'electricity_pricing_predicted_2',
       'electricity_pricing_predicted_3', 'cooling_demand', 'dhw_demand',
       'occupant_count', 'indoor_dry_bulb_temperature_cool

In [None]:
print(final_df.isna().sum())

Building                                     0
Time Step                                    0
day_type                                     0
hour                                         0
outdoor_dry_bulb_temperature                 0
                                            ..
cooling_device_efficiency                    3
heating_device_efficiency                    3
dhw_device_efficiency                        3
indoor_dry_bulb_temperature_cooling_delta    3
indoor_dry_bulb_temperature_heating_delta    3
Length: 66, dtype: int64


In [None]:
# Display the first row as a dictionary to show all columns
print(final_df.iloc[0].to_dict())

{'Building': 0, 'Time Step': 0, 'day_type': 5, 'hour': 1, 'outdoor_dry_bulb_temperature': 24.65999984741211, 'outdoor_dry_bulb_temperature_predicted_1': 24.9106388092041, 'outdoor_dry_bulb_temperature_predicted_2': 38.415958404541016, 'outdoor_dry_bulb_temperature_predicted_3': 27.61146354675293, 'diffuse_solar_irradiance': 0.0, 'diffuse_solar_irradiance_predicted_1': 54.62592697143555, 'diffuse_solar_irradiance_predicted_2': 116.84288787841797, 'diffuse_solar_irradiance_predicted_3': 0.0, 'direct_solar_irradiance': 0.0, 'direct_solar_irradiance_predicted_1': 143.3243408203125, 'direct_solar_irradiance_predicted_2': 1020.756103515625, 'direct_solar_irradiance_predicted_3': 0.0, 'carbon_intensity': 0.4024883508682251, 'indoor_dry_bulb_temperature': 23.098651885986328, 'non_shiftable_load': 0.3568393290042877, 'solar_generation': 0.0, 'dhw_storage_soc': 0.0, 'electrical_storage_soc': 0.20000000298023224, 'net_electricity_consumption': 0.6778813600540161, 'electricity_pricing': 0.02892999

In [None]:
final_df.fillna(final_df.mean(), inplace=True)  # Replace NaN with the column mean

In [None]:
# Identify outliers using the IQR method
Q1 = final_df.quantile(0.25)
Q3 = final_df.quantile(0.75)
IQR = Q3 - Q1

# Filter out outliers (values outside 1.5 * IQR range)
final_df = final_df[~((final_df < (Q1 - 1.5 * IQR)) | (final_df > (Q3 + 1.5 * IQR))).any(axis=1)]

# Or clip outliers (e.g., keeping values between the 1st and 99th percentile)
final_df = final_df.clip(lower=final_df.quantile(0.01), upper=final_df.quantile(0.99), axis=1)

# Check the data after handling outliers
print(final_df.head())

   Building Time Step  day_type  hour  outdoor_dry_bulb_temperature  \
0         0       7.0         5     1                         24.66   
1         0       7.0         5     1                         24.66   
2         0       7.0         5     1                         24.66   
3         0       7.0         5     1                         24.66   
4         0       7.0         5     1                         24.66   

   outdoor_dry_bulb_temperature_predicted_1  \
0                                 24.910639   
1                                 24.910639   
2                                 24.910639   
3                                 24.910639   
4                                 24.910639   

   outdoor_dry_bulb_temperature_predicted_2  \
0                                 38.415958   
1                                 38.415958   
2                                 38.415958   
3                                 38.415958   
4                                 38.415958   

   outd

  final_df = final_df.clip(lower=final_df.quantile(0.01), upper=final_df.quantile(0.99), axis=1)
  final_df = final_df.clip(lower=final_df.quantile(0.01), upper=final_df.quantile(0.99), axis=1)


In [None]:
# Check unique hours in the dataset
unique_hours = final_df['hour'].unique()
print("Unique hours in dataset:", unique_hours)

# Check if any day has only 23 hours instead of 24
hour_counts = final_df.groupby(['Building', final_df['Time Step'] // 24])['hour'].nunique()
print("Minimum hours in a day:", hour_counts.min())
print("Maximum hours in a day:", hour_counts.max())

# If the dataset has some days with only 23 hours, it's likely due to daylight savings.


Unique hours in dataset: [1]
Minimum hours in a day: 1
Maximum hours in a day: 1


In [None]:
final_df.tail()

Unnamed: 0,Building,Time Step,day_type,hour,outdoor_dry_bulb_temperature,outdoor_dry_bulb_temperature_predicted_1,outdoor_dry_bulb_temperature_predicted_2,outdoor_dry_bulb_temperature_predicted_3,diffuse_solar_irradiance,diffuse_solar_irradiance_predicted_1,...,dhw_electricity_consumption,cooling_storage_electricity_consumption,heating_storage_electricity_consumption,dhw_storage_electricity_consumption,electrical_storage_electricity_consumption,cooling_device_efficiency,heating_device_efficiency,dhw_device_efficiency,indoor_dry_bulb_temperature_cooling_delta,indoor_dry_bulb_temperature_heating_delta
2155,2,712.0,5,1,24.66,24.910639,38.415958,27.611464,0.0,54.625927,...,0.0,0.0,0.0,0.0,0.0,3.954786958841088,3.511720008607768,0.904282,-0.012882,-0.012882
2156,2,712.0,5,1,24.66,24.910639,38.415958,27.611464,0.0,54.625927,...,0.0,0.0,0.0,0.0,0.0,3.954786958841088,3.511720008607768,0.904282,-0.012882,-0.012882
2157,2,712.0,5,1,24.66,24.910639,38.415958,27.611464,0.0,54.625927,...,0.0,0.0,0.0,0.0,0.0,3.954786958841088,3.511720008607768,0.904282,-0.012882,-0.012882
2158,2,712.0,5,1,24.66,24.910639,38.415958,27.611464,0.0,54.625927,...,0.0,0.0,0.0,0.0,0.0,3.954786958841088,3.511720008607768,0.904282,-0.012882,-0.012882
2159,2,712.0,5,1,24.66,24.910639,38.415958,27.611464,0.0,54.625927,...,0.0,0.0,0.0,0.0,0.0,3.954786958841088,3.511720008607768,0.904282,-0.012882,-0.012882


In [None]:
import torch
import numpy as np
import pandas as pd

# Step 1: Preprocess Data and Convert to Sequences
def preprocess_data(df):
    """
    Preprocess dataframe into time series sequences for training.
    We will use a window of time steps for each task.
    """
    # Select relevant features for training (You can modify this based on your needs)
    features = df[['hour', 'outdoor_dry_bulb_temperature', 'diffuse_solar_irradiance', 'direct_solar_irradiance','carbon_intensity', 'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation',
       'dhw_storage_soc', 'electrical_storage_soc',
       'net_electricity_consumption', 'electricity_pricing',
       'electricity_pricing_predicted_1', 'electricity_pricing_predicted_2',
       'electricity_pricing_predicted_3', 'cooling_demand', 'dhw_demand',
       'occupant_count', 'indoor_dry_bulb_temperature_cooling_set_point',
       'month', 'heating_demand', 'daylight_savings_status',
       'average_unmet_cooling_setpoint_difference', 'indoor_relative_humidity',
       'indoor_dry_bulb_temperature_heating_set_point', 'power_outage',
       'comfort_band', 'indoor_dry_bulb_temperature_without_control',
       'cooling_demand_without_control', 'heating_demand_without_control',
       'dhw_demand_without_control', 'non_shiftable_load_without_control',
       'indoor_relative_humidity_without_control',
       'indoor_dry_bulb_temperature_cooling_set_point_without_control',
       'indoor_dry_bulb_temperature_heating_set_point_without_control',
       'hvac_mode', 'outdoor_relative_humidity',
       'outdoor_relative_humidity_predicted_1',
       'outdoor_relative_humidity_predicted_2',
       'outdoor_relative_humidity_predicted_3', 'cooling_storage_soc',
       'heating_storage_soc', 'cooling_electricity_consumption',
       'heating_electricity_consumption', 'dhw_electricity_consumption',
       'cooling_storage_electricity_consumption',
       'heating_storage_electricity_consumption',
       'dhw_storage_electricity_consumption',
       'electrical_storage_electricity_consumption',
       'cooling_device_efficiency', 'heating_device_efficiency',
       'dhw_device_efficiency', 'indoor_dry_bulb_temperature_cooling_delta',
       'indoor_dry_bulb_temperature_heating_delta']  # Example features
    labels = df[['net_electricity_consumption']]  # Target

    # Convert to numpy arrays
    features = features.to_numpy().astype(np.float32)
    labels = labels.to_numpy().astype(np.float32)

    return features, labels

# Load your dataset (replace 'your_data.csv' with the actual file name)
df = final_df  # Use the final_df directly from CityLearn

# Preprocess the data
features, labels = preprocess_data(df)
input_dim = features.shape[1]  # Number of features (columns)
output_dim = 1  # Output: net electricity consumption (or your target)


In [None]:
# Save the DataFrame to a CSV file
final_df.to_csv("final_dataframe.csv", index=False)

In [None]:
from google.colab import files
files.download('final_dataframe.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the features (except 'Building' column)
scaler = MinMaxScaler()
final_df_scaled = final_df.copy()

# Apply scaling to all columns except the 'Building' column
final_df_scaled[final_df.columns.difference(['Building'])] = scaler.fit_transform(final_df[final_df.columns.difference(['Building'])])

print(final_df_scaled.head())  # Check scaled DataFrame

   Building  day_type  hour  outdoor_dry_bulb_temperature  \
0         0       0.0   0.0                           0.0   
1         1       0.0   0.0                           0.0   
2         2       0.0   0.0                           0.0   

   outdoor_dry_bulb_temperature_predicted_1  \
0                                       0.0   
1                                       0.0   
2                                       0.0   

   outdoor_dry_bulb_temperature_predicted_2  \
0                                       0.0   
1                                       0.0   
2                                       0.0   

   outdoor_dry_bulb_temperature_predicted_3  diffuse_solar_irradiance  \
0                                       0.0                       0.0   
1                                       0.0                       0.0   
2                                       0.0                       0.0   

   diffuse_solar_irradiance_predicted_1  diffuse_solar_irradiance_predicted_2  \
0  

  return lib.map_infer(values, mapper, convert=convert)
  if is_sparse(pd_dtype):
  return lib.map_infer(values, mapper, convert=convert)
  if is_sparse(pd_dtype):


In [None]:
# Check for missing values
print(final_df.isnull().sum())

# Fill missing values with the column mean
final_df.fillna(final_df.mean(), inplace=True)

Building                                         0
day_type                                         0
hour                                             0
outdoor_dry_bulb_temperature                     0
outdoor_dry_bulb_temperature_predicted_1         0
outdoor_dry_bulb_temperature_predicted_2         0
outdoor_dry_bulb_temperature_predicted_3         0
diffuse_solar_irradiance                         0
diffuse_solar_irradiance_predicted_1             0
diffuse_solar_irradiance_predicted_2             0
diffuse_solar_irradiance_predicted_3             0
direct_solar_irradiance                          0
direct_solar_irradiance_predicted_1              0
direct_solar_irradiance_predicted_2              0
direct_solar_irradiance_predicted_3              0
carbon_intensity                                 0
indoor_dry_bulb_temperature                      0
non_shiftable_load                               0
solar_generation                                 0
dhw_storage_soc                

In [None]:
citylearn.wrappers.NormalizedObservationWrapper(env)

<NormalizedObservationWrapper<CityLearnEnv instance>>

In [None]:
citylearn.wrappers.ClippedObservationWrapper(env)

<ClippedObservationWrapper<CityLearnEnv instance>>

In [None]:
from stable_baselines3.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=True)
env = NormalizedObservationWrapper(env)
env = StableBaselines3Wrapper(env)
model = Agent('MlpPolicy', env)

# train
episodes = 5
model.learn(total_timesteps=env.unwrapped.time_steps*episodes)

# test
observations, _ = env.reset()

while not env.unwrapped.terminated:
    actions, _ = model.predict(observations, deterministic=True)
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

INFO:root:The citylearn_challenge_2023_phase_2_local_evaluation dataset DNE in cache. Will download from intelligent-environments-lab/CityLearn/tree/v2.3.0 GitHub repository and write to /root/.cache/citylearn/v2.3.0/datasets. Next time DataSet.get_dataset('citylearn_challenge_2023_phase_2_local_evaluation') is called, it will read from cache unless DataSet.clear_cache is run first.


name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,0.9
annual_normalized_unserved_energy_total,0.015,0.017,0.012,0.015
carbon_emissions_total,0.424,1.415,0.724,0.854
cost_total,0.397,1.418,0.715,0.843
daily_one_minus_load_factor_average,,,,1.092
daily_peak_average,,,,0.847
discomfort_cold_delta_average,0.0,2.029,0.013,0.681
discomfort_cold_delta_maximum,0.124,6.15,1.276,2.516
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.0,0.647,0.0,0.216


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple feedforward neural network for continuous action space (9 actions)
class SimplePolicy(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimplePolicy, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 128)         # Second hidden layer
        self.fc3 = nn.Linear(128, output_size)  # Output layer (size 9)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)  # Output continuous actions

# Initialize the model with input size 49 (observation space) and output size 9 (action space)
model = SimplePolicy(49, 9)

# Test the model with a dummy input to ensure it works
dummy_input = torch.randn(1, 49)
dummy_output = model(dummy_input)
print(dummy_output)


tensor([[-0.0382, -0.0684,  0.0778, -0.0019,  0.0705,  0.1016, -0.1002,  0.0505,
         -0.0155]], grad_fn=<AddmmBackward0>)


In [None]:
# Define the path to your challenge dataset
dataset_path = "citylearn_challenge_2023_phase_1"  # Path to the dataset directory

# Initialize the CityLearnEnv with the path
env = CityLearnEnv(dataset_path)

# Initialize the model and Reptile agent
model = SimplePolicy(49, 9)
agent = ReptileMetaAgent(env, model)

# Train the agent
agent.train()

In [None]:
def evaluate(self):
    # Reset environment and get initial observation
    obs, info = self.env.reset()
    obs = np.array(obs) if isinstance(obs, list) else obs
    obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(0) if len(obs.shape) == 1 else torch.tensor(obs, dtype=torch.float32)

    total_reward = 0
    for step in range(100):
        # Forward pass through the model to get the action
        action = self.model(obs)

        # Check the raw action values
        print(f"Step {step} - Raw Action (Before Sigmoid): {action.detach().numpy()}")

        # Ensure the action is within valid bounds (0, 1)
        action = torch.sigmoid(action)  # Normalizes action within [0, 1]

        # Clamping the action to ensure it's within [0, 1]
        action = torch.clamp(action, 0.0, 1.0)

        print(f"Step {step} - Action After Sigmoid & Clamping: {action.detach().numpy()}")

        action_numpy = action.detach().numpy()

        # Step the environment with the action
        output = self.env.step(action_numpy)
        print(f"Step {step} - Env Output: {output}")

        # Unpack the returned values from the environment
        if len(output) == 4:
            next_obs, reward, done, info = output
        elif len(output) == 5:
            next_obs, reward, done, info, extra = output
        else:
            raise ValueError(f"Unexpected return values from step(): {len(output)}")

        # Fix: Ensure reward is a scalar
        reward = reward[0] if isinstance(reward, (list, np.ndarray)) else reward

        # Track negative consumption and device output
        if "electricity_consumption" in info:
            if info["electricity_consumption"] < 0:
                print(f"❌ Negative electricity consumption detected at timestep {step}, setting to 0.")
                info["electricity_consumption"] = 0  # Clamp negative consumption to 0

        if "device_output" in info:
            if info["device_output"] < 0:
                print(f"❌ Negative device output detected at timestep {step}, setting to 0.")
                info["device_output"] = 0  # Clamp negative device output to 0

        # Print values for debugging
        print(f"Step {step} - Electricity Consumption: {info.get('electricity_consumption', 'Not Available')}")
        print(f"Step {step} - Device Output: {info.get('device_output', 'Not Available')}")

        # Prepare the next observation for the next step
        next_obs = torch.tensor(next_obs, dtype=torch.float32).unsqueeze(0) if len(next_obs.shape) == 1 else torc


In [None]:
# After training, evaluate the agent on the same environment (or a different one)
agent.evaluate()

Initial Observation Type: <class 'list'>
Initial Observation: [[5, 1, 24.66, 24.910639, 38.41596, 27.611464, 0.0, 54.625927, 116.84289, 0.0, 0.0, 143.32434, 1020.7561, 0.0, 0.40248835, 23.098652, 0.35683933, 0.0, 0.0, 0.2, 0.67788136, 0.02893, 0.02893, 0.02915, 0.02893, 1.1192156, 0.055682074, 3.0, 23.222221, 24.278513, 0.18733284, 0.0, 0.0, 0.2, 0.18733284, 0.0, 0.0, 1.0, 24.444445, 24.431562, 0.4220805, 0.0, 0.0, 0.2, 0.5631514, 0.5579055, 0.0, 2.0, 24.444445]]
Observation after conversion to numpy: (1, 49)
Observation after conversion to tensor: torch.Size([1, 49])
Observation with batch dimension: torch.Size([1, 49])
Returned from step() - ([[5, 2, 24.07, 27.076563, 38.31935, 23.5155, 0.0, 78.37548, 89.220505, 0.0, 0.0, 300.36105, 825.0973, 0.0, 0.38262463, 22.234743, 0.34507817, 0.0, 1.0, 0.961749, 6.531588, 0.02893, 0.02915, 0.02915, 0.02893, 1.4696382, 0.15933841466903687, 3.0, 22.222221, 24.264734, 0.18591776, 0.0, 0.18046626, 0.19998, 0.5235963, 0.0, 0.0, 1.0, 24.444445, 24.44

AssertionError: negative electricity consumption for cooling demand | timestep: 14, building: Building_1, device_output: -156.19017028808594, electricity_consumption: -68.61191650022172

In [None]:
class ReptileMetaAgent:
    def __init__(self, env, model, inner_lr=0.01, meta_lr=0.001, num_iterations=1000):
        self.env = env
        self.model = model
        self.inner_lr = inner_lr
        self.meta_lr = meta_lr
        self.num_iterations = num_iterations
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.meta_lr)

    def train(self):
        for iteration in range(self.num_iterations):
            meta_gradient = [torch.zeros_like(param) for param in self.model.parameters()]

            for task in range(5):  # Train over 5 different tasks (environments)
                task_model = SimplePolicy(49, 9)
                task_model.load_state_dict(self.model.state_dict())  # Copy model weights
                task_optimizer = optim.Adam(task_model.parameters(), lr=self.inner_lr)

                # Reset the environment
                obs, info = self.env.reset()  # Unpack observation and info
                print(f"Initial Observation Type: {type(obs)}")  # Debugging print
                print(f"Initial Observation: {obs}")  # Debugging print

                # Ensure the observation is converted to numpy or tensor
                if isinstance(obs, list):
                    obs = np.array(obs)  # Convert list to numpy array
                    print(f"Observation after conversion to numpy: {obs.shape}")  # Debugging print
                elif isinstance(obs, np.ndarray):
                    print(f"Observation is already a numpy array: {obs.shape}")  # Debugging print

                obs = torch.tensor(obs, dtype=torch.float32)  # Convert to torch tensor
                print(f"Observation after conversion to tensor: {obs.shape}")  # Debugging print

                # Add batch dimension if not present
                if len(obs.shape) == 1:
                    obs = obs.unsqueeze(0)
                print(f"Observation with batch dimension: {obs.shape}")  # Debugging print

                for step in range(1000):  # Inner loop for task-specific updates
                    action = task_model(obs)

                    # Take a step in the environment
                    # Check if more than 4 values are returned from step()
                    output = self.env.step(action.detach().numpy())
                    print(f"Returned from step() - {output}")  # Debugging print

                    # Unpack the returned values accordingly
                    if len(output) == 4:
                        next_obs, reward, done, info = output
                    elif len(output) == 5:
                        next_obs, reward, done, info, extra = output  # If 5 values are returned
                    else:
                        raise ValueError(f"Unexpected number of return values from step(): {len(output)}")
                    print(f"Next Observation Type: {type(next_obs)}")  # Debugging print
                    print(f"Next Observation: {next_obs}")  # Debugging print

                    # Handle the output of step()
                    if isinstance(next_obs, list):
                        next_obs = np.array(next_obs)  # Convert list to numpy array
                        print(f"Next Observation after conversion to numpy: {next_obs.shape}")  # Debugging print
                    elif isinstance(next_obs, np.ndarray):
                        print(f"Next Observation is already a numpy array: {next_obs.shape}")  # Debugging print

                    next_obs = torch.tensor(next_obs, dtype=torch.float32)  # Convert to torch tensor

                    # Add batch dimension if not present
                    if len(next_obs.shape) == 1:
                        next_obs = next_obs.unsqueeze(0)

                    print(f"Next Observation with batch dimension: {next_obs.shape}")  # Debugging print

                    # Ensure reward is a numeric value
                    if isinstance(reward, list):
                        reward = reward[0]  # Extract the first element if it's a list
                    elif isinstance(reward, np.ndarray):
                        reward = reward.item()  # If it's a numpy array, extract the scalar value

                    # Compute the loss (negative reward for simplicity)
                    loss = -reward
                    loss.backward()
                    task_optimizer.step()

                    if done:
                        break
                    obs = next_obs  # Move to the next observation

                # Compute the gradient of the loss for the meta-update
                task_gradient = torch.autograd.grad(loss, task_model.parameters())
                for i, grad in enumerate(task_gradient):
                    meta_gradient[i] += grad

            # Meta-update step
            meta_gradient = [grad / 5 for grad in meta_gradient]
            for param, grad in zip(self.model.parameters(), meta_gradient):
                param.grad = grad
            self.optimizer.step()

            if iteration % 100 == 0:
                print(f"Iteration {iteration}, Meta Loss: {loss.item()}")

    def evaluate(self):
        # Evaluate the agent on a new task/environment
        obs, info = self.env.reset()  # Unpack observation and info
        print(f"Initial Observation Type: {type(obs)}")  # Debugging print
        print(f"Initial Observation: {obs}")  # Debugging print

        # Ensure the observation is converted to numpy or tensor
        if isinstance(obs, list):
            obs = np.array(obs)  # Convert list to numpy array
            print(f"Observation after conversion to numpy: {obs.shape}")  # Debugging print
        elif isinstance(obs, np.ndarray):
            print(f"Observation is already a numpy array: {obs.shape}")  # Debugging print

        obs = torch.tensor(obs, dtype=torch.float32)  # Convert to torch tensor
        print(f"Observation after conversion to tensor: {obs.shape}")  # Debugging print

        # Add batch dimension if not present
        if len(obs.shape) == 1:
            obs = obs.unsqueeze(0)
        print(f"Observation with batch dimension: {obs.shape}")  # Debugging print

        total_reward = 0
        for step in range(1000):
            action = self.model(obs)
            # Check if more than 4 values are returned from step()
            output = self.env.step(action.detach().numpy())
            print(f"Returned from step() - {output}")  # Debugging print
            # Unpack the returned values accordingly
            if len(output) == 4:
                  next_obs, reward, done, info = output
            elif len(output) == 5:
                  next_obs, reward, done, info, extra = output  # If 5 values are returned
            else:
                  raise ValueError(f"Unexpected number of return values from step(): {len(output)}")

            next_obs = torch.tensor(next_obs, dtype=torch.float32)

            # Add batch dimension if not present
            if len(next_obs.shape) == 1:
                next_obs = next_obs.unsqueeze(0)

            total_reward += reward
            if done:
                break
            obs = next_obs
        print(f"Final reward: {total_reward}")


In [None]:
import shimmy

In [None]:
pip install 'shimmy>=2.0'

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import copy

class MAMLModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=64):
        super(MAMLModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def copy(self):
        return copy.deepcopy(self)

def compute_loss(model, env):
    """Compute loss for a given environment/task."""
    state = env.reset()
    total_loss = 0
    for _ in range(10):  # Simulate a few steps
        state_tensor = torch.tensor(state, dtype=torch.float32)
        action = model(state_tensor)  # Assume continuous action space
        next_state, reward, done, _ = env.step(action.detach().numpy())
        loss = -reward  # Negative reward as loss (to maximize reward)
        total_loss += loss
        if done:
            break
        state = next_state
    return total_loss

class MAML:
    def __init__(self, model, inner_lr=0.01, meta_lr=0.001, num_iterations=1000):
        self.model = model
        self.inner_lr = inner_lr
        self.meta_lr = meta_lr
        self.num_iterations = num_iterations
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.meta_lr)

    def train(self, envs):
        for iteration in range(self.num_iterations):
            meta_gradient = [torch.zeros_like(param) for param in self.model.parameters()]

            for env in envs:  # Train over different tasks
                task_model = self.model.copy()
                task_optimizer = optim.SGD(task_model.parameters(), lr=self.inner_lr)

                task_loss = compute_loss(task_model, env)

                task_optimizer.zero_grad()
                task_loss.backward()
                task_optimizer.step()

                for i, param in enumerate(task_model.parameters()):
                    meta_gradient[i] += param.grad

            # Meta-update step
            meta_gradient = [grad / len(envs) for grad in meta_gradient]
            for param, grad in zip(self.model.parameters(), meta_gradient):
                param.grad = grad
            self.optimizer.step()

            if iteration % 100 == 0:
                print(f"Iteration {iteration}, Meta Loss: {task_loss.item()}")

In [None]:
# Example usage:
# Assuming `envs` is a list of CityLearn environments
maml_model = MAMLModel(input_dim=10, output_dim=5)  # Adjust input/output dimensions
maml = MAML(maml_model)
maml.train(envs)

In [None]:
from citylearn.agents.base import BaselineAgent as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=True)
model = Agent(env)

# step through environment and apply agent actions
observations, _ = env.reset()

while not env.terminated:
    actions = model.predict(observations)
    observations, reward, info, terminated, truncated = env.step(actions)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

INFO:root:The citylearn_challenge_2023_phase_2_local_evaluation dataset DNE in cache. Will download from intelligent-environments-lab/CityLearn/tree/v2.3.0 GitHub repository and write to /root/.cache/citylearn/v2.3.0/datasets. Next time DataSet.get_dataset('citylearn_challenge_2023_phase_2_local_evaluation') is called, it will read from cache unless DataSet.clear_cache is run first.
  self.load_state_dict(torch.load(self.filepath)['model_state_dict'])
  gym.logger.warn(
  gym.logger.warn(


name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,1.0
annual_normalized_unserved_energy_total,0.019,0.018,0.018,0.018
carbon_emissions_total,1.0,1.0,1.0,1.0
cost_total,1.0,1.0,1.0,1.0
daily_one_minus_load_factor_average,,,,1.0
daily_peak_average,,,,1.0
discomfort_cold_delta_average,1.657,0.045,0.675,0.793
discomfort_cold_delta_maximum,4.878,1.793,3.642,3.438
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.369,0.0,0.095,0.155


In [None]:
import gym
import citylearn
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback

In [None]:
env = NormalizedObservationWrapper(env)
env = NormalizedActionWrapper(env)

env1 = StableBaselines3ObservationWrapper(env)
env = DummyVecEnv([lambda: env1])

  gym.logger.warn(
  gym.logger.warn(


In [None]:
print('All CityLearn datasets:', sorted(DataSet().get_dataset_names()))

All CityLearn datasets: ['baeda_3dem', 'ca_alameda_county_neighborhood', 'citylearn_challenge_2020_climate_zone_1', 'citylearn_challenge_2020_climate_zone_2', 'citylearn_challenge_2020_climate_zone_3', 'citylearn_challenge_2020_climate_zone_4', 'citylearn_challenge_2021', 'citylearn_challenge_2022_phase_1', 'citylearn_challenge_2022_phase_2', 'citylearn_challenge_2022_phase_3', 'citylearn_challenge_2022_phase_all', 'citylearn_challenge_2022_phase_all_plus_evs', 'citylearn_challenge_2023_phase_1', 'citylearn_challenge_2023_phase_2_local_evaluation', 'citylearn_challenge_2023_phase_2_online_evaluation_1', 'citylearn_challenge_2023_phase_2_online_evaluation_2', 'citylearn_challenge_2023_phase_2_online_evaluation_3', 'citylearn_challenge_2023_phase_3_1', 'citylearn_challenge_2023_phase_3_2', 'citylearn_challenge_2023_phase_3_3', 'quebec_neighborhood_with_demand_response_set_points', 'quebec_neighborhood_without_demand_response_set_points', 'tx_travis_county_neighborhood', 'vt_chittenden_co

In [None]:
dataset_path = "citylearn_challenge_2023_phase_1"  # Path to the dataset directory

# Initialize the CityLearnEnv with the path
env = CityLearnEnv(dataset_path)

In [None]:
from citylearn.agents.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv

# Define which buildings to train on (inside phase 1 dataset)
building_scenarios = [
    [0],  # Train on Building 0
    [1],  # Train on Building 1
    [2]   # Train on Building 2
]

models = []

for building_set in building_scenarios:
    # Initialize CityLearn Environment with specific buildings
    env = CityLearnEnv(schema='citylearn_challenge_2023_phase_1', building_ids=building_set, central_agent=False)

    # Train SAC Agent
    model = Agent(env)
    model.learn(episodes=5, deterministic_finish=True)

    # Store trained models for later adaptation
    models.append(model)

print("Meta-RL training completed across multiple buildings!")


Meta-RL training completed across multiple buildings!


In [None]:
from citylearn.agents.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv

# Define which buildings to train on (inside phase 1 dataset)
building_scenarios = [
    [0],  # Train on Building 0
    [1],  # Train on Building 1
    [2]   # Train on Building 2
]

models2 = []

for building_set in building_scenarios:
    # Initialize CityLearn Environment with specific buildings
    env = CityLearnEnv(schema='citylearn_challenge_2023_phase_2_online_evaluation_1',
                        building_ids=building_set, central_agent=False)

    # Get observation shape (for first building)
    obs_shape = env.observation_space[0].shape[0]  # Correct way to access observation shape
    print(f"Training on Buildings: {building_set}, Observation Size: {obs_shape}")

    # Train SAC Agent
    model2 = Agent(env)
    model2.learn(episodes=5, deterministic_finish=True)

    # Store trained models for later adaptation
    models2.append(model2)

print("✅ Meta-RL training completed across multiple buildings!")


Training on Buildings: [0], Observation Size: 30
Training on Buildings: [1], Observation Size: 30
Training on Buildings: [2], Observation Size: 30
✅ Meta-RL training completed across multiple buildings!


In [None]:
# Load an unseen building (Building 3)
test_env = CityLearnEnv(schema='citylearn_challenge_2023_phase_2_online_evaluation_1', building_ids=[2], central_agent=False)

# Use one of the trained models (example: model trained on Building 0)
meta_model = model2[0]

# Reset environment
obs, _ = test_env.reset()
done = False

while not done:
    action, _ = meta_model.predict(obs, deterministic=True)
    obs, _, done, _, _ = test_env.step(action)

# Evaluate KPIs for the unseen building
kpis = test_env.evaluate()
kpis = kpis.pivot(index="cost_function", columns="name", values="value").round(3)
kpis = kpis.dropna(how="all")

# Display KPIs
print("KPIs for Unseen Building:")
print(kpis)


ValueError: operands could not be broadcast together with shapes (29,) (30,) 

In [None]:
# Define all buildings at once to ensure consistent observation space
train_env = CityLearnEnv(schema='citylearn_challenge_2023_phase_1', central_agent=False)

# Use the same environment observation space for all test environments
test_env = CityLearnEnv(schema='citylearn_challenge_2023_phase_1', building_ids=[3], central_agent=False)

# Set the test environment's observation space to match the training environment
test_env.observation_space = train_env.observation_space


AttributeError: property 'observation_space' of 'CityLearnEnv' object has no setter