In [7]:
# Run this again after editing submodules so Colab uses the updated versions
from citylearn import CityLearn
from citylearn import GridLearn
import matplotlib.pyplot as plt
from pathlib import Path
from citylearn import RL_Agents_Coord, Cluster_Agents
import numpy as np                                                                                                                                                                                      
import csv
import time
import re
import pandas as pd
import torch
from joblib import dump, load

# Load environment
climate_zone = 1
data_path = Path("citylearn/data/Climate_Zone_"+str(climate_zone))
building_attributes = data_path / 'building_attributes.json'
weather_file = data_path / 'weather_data.csv'
solar_profile = data_path / 'solar_generation_1kW.csv'
building_state_actions = 'citylearn/buildings_state_action_space.json'
building_id = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','quadratic','voltage_dev']

ep_period = 10

print("Initializing the grid...")
# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
# Can be obtained using observations_spaces[i].low or .high
env = GridLearn(data_path, building_attributes, weather_file, solar_profile, building_id, 1, buildings_states_actions = building_state_actions, simulation_period = (0,ep_period), cost_function = objective_function, verbose=1, n_buildings_per_bus=1)

# Hyperparameters
batch_size = 254
bs = batch_size
tau = 0.005
gamma = 0.99
lr = 0.0003
hid = [batch_size,batch_size]

n_episodes = 5
n_training_eps = n_episodes - 1

if not (batch_size < ep_period * n_training_eps):
    print("will produce a key error because the neural nets won't be initialized yet")

print("Initializing the agents...")
# Instantiating the control agent(s)
agents = RL_Agents_Coord(env, list(env.buildings.keys()), discount = gamma, batch_size = bs, replay_buffer_capacity = 1e5, regression_buffer_capacity = 12*ep_period, tau=tau, lr=lr, hidden_dim=hid, start_training=(ep_period+1)*(n_episodes-1), exploration_period = (ep_period+1)*(n_episodes)+1,  start_regression=(ep_period+1), information_sharing = True, pca_compression = .95, action_scaling_coef=0.5, reward_scaling = 5., update_per_step = 1, iterations_as = 2)

Initializing the grid...


  self.state = np.array(self.state)


Initializing the agents...
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35


In [8]:
print("Starting the experiment...")
# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()
for e in range(n_episodes):
    is_evaluating = (e > n_training_eps) # Evaluate deterministic policy after 7 epochs
    rewards = []
    state = env.reset()
    done = False

    j = 0
    
    print("is_deterministic", is_evaluating)
    action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)
#     print(action)
    while not done:
        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
        agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next

    print('Loss -',env.cost(), 'Simulation time (min) -',(time.time()-start)/60.0)

Starting the experiment...


  self.state = np.array(self.state)


is_deterministic False
1
2
3
4
5
6
7
8
9
10
Cumulated reward: 19.81360799389357
11


  self.state = np.array(self.state)
  self.state = np.array(self.state)


Loss - {'ramping': 1.0649052339710097, '1-load_factor': 0.8042406467634421, 'average_daily_peak': 1.7071949229751466, 'peak_demand': 1.7071949229751466, 'net_electricity_consumption': 1.6352639686566997, 'voltage_dev': 1.5247893862922681, 'quadratic': 2.6770741568910794, 'total': 1.588666176932113} Simulation time (min) - 0.5289287010828654
is_deterministic False
12
13
14
15
16
17
18
19
20
21
Cumulated reward: 21.766367737180445
22
Loss - {'ramping': 1.2636191884306702, '1-load_factor': 0.697389730411083, 'average_daily_peak': 1.6151286444670008, 'peak_demand': 1.6151286444670008, 'net_electricity_consumption': 1.690290301598393, 'voltage_dev': 1.6528219741693588, 'quadratic': 2.8503065975967985, 'total': 1.6263835830200435} Simulation time (min) - 0.7063748995463054


  self.state = np.array(self.state)


is_deterministic False
23
24
25
26
27
28
29
30
31
32
Cumulated reward: 22.148975578151706
33
Loss - {'ramping': 1.2590716864080573, '1-load_factor': 0.7016823741736532, 'average_daily_peak': 1.6303569842428975, 'peak_demand': 1.6303569842428975, 'net_electricity_consumption': 1.6505671718217774, 'voltage_dev': 1.6614692965645055, 'quadratic': 2.728320408437959, 'total': 1.6088321294131067} Simulation time (min) - 0.8638240536053975


  self.state = np.array(self.state)


is_deterministic False
34
35
36
37
38
39
40
41
42
43
Cumulated reward: 20.60639277789683
44
Loss - {'ramping': 1.1685638181405889, '1-load_factor': 0.7594803396301438, 'average_daily_peak': 1.7273694926026353, 'peak_demand': 1.7273694926026353, 'net_electricity_consumption': 1.6729339577242317, 'voltage_dev': 1.5874456110880018, 'quadratic': 2.809743776556291, 'total': 1.6361294983349326} Simulation time (min) - 1.0325966676076253


  self.state = np.array(self.state)


is_deterministic False
45
46
47
48
49
50
51
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
normalizing...
(35, 37)
52
53
54
Cumulated reward: 21.30036001440004
55
Loss - {'ramping': 0.9889972643140278, '1-load_factor': 0.6925399084788022, 'average_daily_peak': 1.6665008845105223, 'peak_demand': 1.66650

In [None]:
np.arange(10).shape

In [None]:
x = [j[0] for j in agents.replay_buffer['JTMN9'].buffer]
len(x[0])

In [None]:
import gym
import numpy as np
from stable_baselines3.sac.policies import MlpPolicy
from stable_baselines3 import SAC
from citylearn import  CityLearn
import matplotlib.pyplot as plt
from pathlib import Path
import time
from gridlearn import GridLearn

In [None]:
from gym.envs import register

climate_zone = 1
data_path = Path("data/Climate_Zone_"+str(climate_zone))
config = {
    'data_path': data_path,
    'building_attributes': data_path / 'building_attributes.json',
    'buildings_states_actions': 'buildings_state_action_space.json',
    'weather_file': data_path / 'weather_data.csv',
    'solar_profile': data_path / 'solar_generation_1kW.csv',
    'building_ids': ['Building_3'],
    'hourly_timesteps': 3,
    'central_agent':True,
    'cost_function':['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','quadratic']
}

env_name = 'MyEnv-v1'

register(id=env_name,
     entry_point='gridlearn:GridLearn',
     max_episode_steps=8760,
     kwargs = config)

env = gym.make(env_name)

In [None]:
model = SAC(MlpPolicy, env, verbose=0, learning_rate=0.01, gamma=0.99, tau=3e-4, batch_size=64, learning_starts=8759)
start = time.time()
print("starting learning")
model.learn(total_timesteps=20, log_interval=1000)
print(time.time()-start)

obs = env.reset()
dones = False
counter = []
print("starting evaluation")
while dones==False:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    counter.append(rewards)
env.cost()

In [None]:
obs = env.observation_space

In [None]:
obs

In [None]:
env.n_buildings

In [None]:
type({"a":1}) ==dict
