In [1]:
# Run this again after editing submodules so Colab uses the updated versions
from citylearn import CityLearn
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from TD3 import TD3 as Agent

import sys
import warnings
import time

if not sys.warnoptions:
    warnings.simplefilter("ignore")

# Load environment
climate_zone = 5
params = {
    "data_path": Path("data/Climate_Zone_" + str(climate_zone)),
    "building_attributes": "building_attributes.json",
    "weather_file": "weather_data.csv",
    "solar_profile": "solar_generation_1kW.csv",
    "carbon_intensity": "carbon_intensity.csv",
    "building_ids": ["Building_" + str(i) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]],
    "buildings_states_actions": "buildings_state_action_space.json",
    "simulation_period": (0, 8760 * 4 - 1),
    "cost_function": [
        "ramping",
        "1-load_factor",
        "average_daily_peak",
        "peak_demand",
        "net_electricity_consumption",
        "carbon_emissions",
    ],
    "central_agent": False,
    "save_memory": False,
}

# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
env = CityLearn(**params)
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()


params_agent = {
    "building_ids": ["Building_" + str(i) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]],
    "buildings_states_actions": "buildings_state_action_space.json",
    "building_info": building_info,
    "observation_spaces": observations_spaces,
    "action_spaces": actions_spaces,
}

In [2]:
# Instantiating the control agent(s)
# agents = Agent(**params_agent)
RBC_THRESHOLD = 336  # 2 weeks

agents = Agent(
    num_actions=actions_spaces,
    num_buildings=len(observations_spaces),
    rbc_threshold=RBC_THRESHOLD,
)

end_time = RBC_THRESHOLD + 24 * 30  # run for a month

start_time = time.time()

state = env.reset()

done = False

action = agents.select_action(state)[0]

true_E_grid = []
while not done and env.time_step < end_time:
    next_state, reward, done, _ = env.step(action)
    action_next = agents.select_action(next_state)[0]
    agents.add_to_buffer(
        state,
        action,
        reward,
        next_state,
        done
    )
    true_E_grid.append(state[:, 28])
    state = next_state
    action = action_next

    print(f"\rTime step: {env.time_step}", end="")

print(f"\nTotal time to run {end_time // 24} days (sec): {time.time() - start_time}")

Time step: 895
Solving using SCS at t = 8 for building 4
Time step: 935

KeyboardInterrupt: 