In [1]:
from citylearn import  CityLearn
from pathlib import Path
from agent import RL_Agents
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda

import gym
import argparse
import numpy as np

In [2]:
gamma = 0.95
update_interval = 5
actor_lr = 0.0005
critic_lr = 0.001

"""
A2C 
"""

'\nA2C \n'

In [3]:
class Actor:
    def __init__(self, state_dim, action_dim, action_bound, std_bound):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_bound = action_bound
        self.std_bound = std_bound
        self.model = self.create_model()
        self.opt = tf.keras.optimizers.Adam(actor_lr)

    def create_model(self):
        state_input = Input((self.state_dim,))
        dense_1 = Dense(32, activation='relu')(state_input)
        dense_2 = Dense(32, activation='relu')(dense_1)
        out_mu = Dense(self.action_dim, activation='tanh')(dense_2)
        mu_output = Lambda(lambda x: x * self.action_bound)(out_mu)
        std_output = Dense(self.action_dim, activation='softplus')(dense_2)
        return tf.keras.models.Model(state_input, [mu_output, std_output])

    def get_action(self, state):
        state = np.reshape(state, [1, self.state_dim])
        mu, std = self.model.predict(state)
        mu, std = mu[0], std[0]
        return np.random.normal(mu, std, size=self.action_dim)

    def log_pdf(self, mu, std, action):
        std = tf.clip_by_value(std, self.std_bound[0], self.std_bound[1])
        var = std ** 2
        log_policy_pdf = -0.5 * (action - mu) ** 2 / \
            var - 0.5 * tf.math.log(var * 2 * np.pi)
        return tf.reduce_sum(log_policy_pdf, 1, keepdims=True)

    def compute_loss(self, mu, std, actions, advantages):
        log_policy_pdf = self.log_pdf(mu, std, actions)
        loss_policy = log_policy_pdf * advantages
        return tf.reduce_sum(-loss_policy)

    def train(self, states, actions, advantages):
        with tf.GradientTape() as tape:
            mu, std = self.model(states, training=True)
            loss = self.compute_loss(mu, std, actions, advantages)
        grads = tape.gradient(loss, self.model.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.model.trainable_variables))
        return loss


In [4]:
class Critic:
    def __init__(self, state_dim):
        self.state_dim = state_dim
        self.model = self.create_model()
        self.opt = tf.keras.optimizers.Adam(critic_lr)

    def create_model(self):
        return tf.keras.Sequential([
            Input((self.state_dim,)),
            Dense(32, activation='relu'),
            Dense(32, activation='relu'),
            Dense(16, activation='relu'),
            Dense(1, activation='linear')
        ])

    def compute_loss(self, v_pred, td_targets):
        mse = tf.keras.losses.MeanSquaredError()
        return mse(td_targets, v_pred)

    def train(self, states, td_targets):
        with tf.GradientTape() as tape:
            v_pred = self.model(states, training=True)
            assert v_pred.shape == td_targets.shape
            loss = self.compute_loss(v_pred, tf.stop_gradient(td_targets))
        grads = tape.gradient(loss, self.model.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.model.trainable_variables))
        return loss

In [5]:
class Agent:
    def __init__(self, building_info, observation_spaces = None, action_spaces = None):
        self.building_info = building_info # Can be used to create different RL agents based on basic building attributes or climate zones
        
        self.env = env
        self.state_dim = len(observation_spaces)
        self.action_dim = len(action_spaces)
        self.action_bound = self.env.action_space.high[0]
        self.std_bound = [1e-2, 1.0]
        
        self.actor = Actor(self.state_dim, self.action_dim,
                           self.action_bound, self.std_bound)
        self.critic = Critic(self.state_dim)

    def td_target(self, reward, next_state, done):
        if done:
            return reward
        v_value = self.critic.model.predict(
            np.reshape(next_state, [1, self.state_dim]))
        return np.reshape(reward + gamma * v_value[0], [1, 1])

    def advatnage(self, td_targets, baselines):
        return td_targets - baselines

    def list_to_batch(self, list):
        batch = list[0]
        for elem in list[1:]:
            batch = np.append(batch, elem, axis=0)
        return batch

    def train(self, max_episodes=1):
        for ep in range(max_episodes):
            state_batch = []
            action_batch = []
            td_target_batch = []
            advatnage_batch = []
            episode_reward, done = 0, False

            state = self.env.reset()

            while not done:
                # self.env.render()
                action = self.actor.get_action(state)
                action = np.clip(action, -self.action_bound, self.action_bound)

                next_state, reward, done, _ = self.env.step(action)

                state = np.reshape(state, [1, self.state_dim])
                action = np.reshape(action, [1, self.action_dim])
                next_state = np.reshape(next_state, [1, self.state_dim])
                reward = np.reshape(reward, [1, 1])

                td_target = self.td_target((reward+8)/8, next_state, done)
                advantage = self.advatnage(
                    td_target, self.critic.model.predict(state))

                state_batch.append(state)
                action_batch.append(action)
                td_target_batch.append(td_target)
                advatnage_batch.append(advantage)

                if len(state_batch) >= update_interval or done:
                    states = self.list_to_batch(state_batch)
                    actions = self.list_to_batch(action_batch)
                    td_targets = self.list_to_batch(td_target_batch)
                    advantages = self.list_to_batch(advatnage_batch)

                    actor_loss = self.actor.train(states, actions, advantages)
                    critic_loss = self.critic.train(states, td_targets)

                    state_batch = []
                    action_batch = []
                    td_target_batch = []
                    advatnage_batch = []

                episode_reward += reward[0][0]
                state = next_state[0]

            print('EP{} EpisodeReward={}'.format(ep, episode_reward))


In [98]:
# Select the climate zone and load environment
climate_zone = 1
data_path = Path("data/Climate_Zone_"+str(climate_zone))
building_attributes = data_path / 'building_attributes.json'
weather_file = data_path / 'weather_data.csv'
solar_profile = data_path / 'solar_generation_1kW.csv'
building_state_actions = 'buildings_state_action_space.json'
building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption']

env = CityLearn(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions = building_state_actions, cost_function = objective_function)
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()


In [7]:
agents = Agent(building_info, observations_spaces, actions_spaces)

In [81]:
state = env.reset()

In [85]:
state

array([[ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ]],
      dtype=float32)

In [88]:
state[0]

array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)

In [11]:
agents.observation_dim

AttributeError: 'Agent' object has no attribute 'observation_dim'

In [None]:
print(env.observation_spaces)

In [None]:
len(env.observation_spaces)

In [None]:
agents.train()

In [14]:
state

array([array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  2.36,  0.  ,
        0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.46,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)], dtype=object)

In [15]:
state.reshape

<function ndarray.reshape>

In [16]:
state

array([array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  2.36,  0.  ,
        0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.46,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)], dtype=object)

In [19]:
np.reshape(state, [1, agents.state_dim])


array([[array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  2.36,  0.  ,
        0.  ], dtype=float32),
        array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.46,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)]], dtype=object)

In [28]:
len(state[0][0])

TypeError: object of type 'numpy.float32' has no len()

In [22]:
sum(state,[])

ValueError: operands could not be broadcast together with shapes (0,) (9,) 

In [None]:
observations_spaces

In [None]:
agents.building_info

In [None]:
len(agents.building_info)

In [36]:
list_t = list([1,2])

In [56]:
import pprint

In [53]:
list_tt=np.array([list_t,list_t,list_t,list_t,list_t,list_t,list_t,list_t,list_t,list_t])

In [58]:
pprint.pprint(list_tt)

array([[1, 2],
       [1, 2],
       [1, 2],
       [1, 2],
       [1, 2],
       [1, 2],
       [1, 2],
       [1, 2],
       [1, 2],
       [1, 2]])


In [49]:
sum(list_tt,[])

ValueError: operands could not be broadcast together with shapes (0,) (2,) 

In [59]:
list_t=np.array([1,2])
list_tt=np.array([list_t,list_t])
list_ttt=np.array([list_tt])
list_ttt

array([[[1, 2],
        [1, 2]]])

In [62]:
list_ttt[0][0][0]

1

In [63]:
state

array([array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  2.36,  0.  ,
        0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.46,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)], dtype=object)

In [70]:
state[2]

array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32)

In [66]:
state[0][1]

17.81

In [69]:
state.reshape(3,)

ValueError: cannot reshape array of size 9 into shape (3,)

In [71]:
observations_spaces

[Box(9,), Box(10,), Box(5,), Box(9,), Box(9,), Box(9,), Box(9,), Box(9,), Box(9,)]

In [74]:
observations_spaces[0][0]

TypeError: 'Box' object is not subscriptable

In [77]:
b= box.Box({'data': 2, 'count': 5})


NameError: name 'box' is not defined

In [78]:
env.reset(project=False)

TypeError: reset() got an unexpected keyword argument 'project'

In [79]:
env

<citylearn.CityLearn object at 0x00000222CF47D808>

In [89]:
env_name = 'Pendulum-v0'
env = gym.make(env_name)

In [107]:
env.observation_spaces

[Box(9,), Box(10,), Box(5,), Box(9,), Box(9,), Box(9,), Box(9,), Box(9,), Box(9,)]

In [105]:
env.observation_space

Box(26,)

In [106]:
env.observation_space.shape[0]

26

In [101]:
env.action_space

Box(16,)

In [102]:
env.action_space.shape[0]

16

In [110]:
env.observations_spaces

AttributeError: 'CityLearn' object has no attribute 'observations_spaces'

In [128]:
env.action_spaces

[Box(2,), Box(2,), Box(1,), Box(1,), Box(2,), Box(2,), Box(2,), Box(2,), Box(2,)]

In [117]:
env.action_spaces[0]

Box(2,)

In [118]:
actor = {}
for i, (o,a) in enumerate(zip(env.observation_spaces, env.action_spaces)):
    actor[i] = Actor(o.shape[0], a.shape[0], )

TypeError: zip argument #1 must support iteration

In [123]:
state

array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)

In [122]:
for i,state in enumerate(state):
    print(i)

0


In [125]:
5e-3

0.005