In [5]:
from citylearn import  CityLearn
from pathlib import Path
from agent import RL_Agents
import numpy as np

import tensorflow as tf
import gym
import argparse
from collections import deque
import random
from tensorflow.keras.layers import Input, Dense, Flatten, Lambda
from tensorflow.keras.optimizers import Adam

In [63]:
# Select the climate zone and load environment
climate_zone = 1
data_path = Path("data/Climate_Zone_"+str(climate_zone))
building_attributes = data_path / 'building_attributes.json'
weather_file = data_path / 'weather_data.csv'
solar_profile = data_path / 'solar_generation_1kW.csv'
building_state_actions = 'buildings_state_action_space.json'
building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption']

env = CityLearn(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions = building_state_actions, cost_function = objective_function)
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()




In [25]:

tf.keras.backend.set_floatx('float64')

parser = argparse.ArgumentParser()
parser.add_argument('--gamma', type=float, default=0.95)
parser.add_argument('--lr', type=float, default=0.005)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--eps', type=float, default=1.0)
parser.add_argument('--eps_decay', type=float, default=0.995)
parser.add_argument('--eps_min', type=float, default=0.01)

args = parser.parse_args()


usage: ipykernel_launcher.py [-h] [--gamma GAMMA] [--lr LR]
                             [--batch_size BATCH_SIZE] [--eps EPS]
                             [--eps_decay EPS_DECAY] [--eps_min EPS_MIN]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\ailab6\AppData\Roaming\jupyter\runtime\kernel-8b95a3d6-abe2-40ca-9727-562a7b60c4c9.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [26]:
gamma = 0.95
lr = 0.005
batch_size = 32
eps = 1.0
eps_decay = 0.995
eps_min = 0.01

In [27]:
class ReplayBuffer:
    def __init__(self, capacity=10000):
        self.buffer = deque(maxlen=capacity)
    
    def put(self, state, action, reward, next_state, done):
        self.buffer.append([state, action, reward, next_state, done])
    
    def sample(self):
        sample = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, done = map(np.asarray, zip(*sample))
        states = np.array(states).reshape(batch_size, -1)
        next_states = np.array(next_states).reshape(batch_size, -1)
        return states, actions, rewards, next_states, done
    
    def size(self):
        return len(self.buffer)

In [28]:
class ActionStateModel:
    def __init__(self, state_dim, aciton_dim):
        self.state_dim  = state_dim
        self.action_dim = aciton_dim
        self.epsilon = eps
        
        self.model = self.create_model()
    
    def create_model(self):
        model = tf.keras.Sequential([
            Input((self.state_dim,)),
            Dense(32, activation='relu'),
            Dense(16, activation='relu'),
            Dense(self.action_dim)
        ])
        model.compile(loss='mse', optimizer=Adam(lr))
        return model
    
    def predict(self, state):
        return self.model.predict(state)
    
    def get_action(self, state):
        state = np.reshape(state, [1, self.state_dim])
        self.epsilon *= eps_decay
        self.epsilon = max(self.epsilon, eps_min)
        q_value = self.predict(state)[0]
        if np.random.random() < self.epsilon:
            return random.randint(0, self.action_dim-1)
        return np.argmax(q_value)

    def train(self, states, targets):
        self.model.fit(states, targets, epochs=1, verbose=0)

In [49]:
class Agent:
    def __init__(self, building_info, observation_spaces = None, action_spaces = None):
        self.env = env
        self.state_dim = len(observation_spaces)
        self.action_dim = len(action_spaces)

        self.model = ActionStateModel(self.state_dim, self.action_dim)
        self.target_model = ActionStateModel(self.state_dim, self.action_dim)
        self.target_update()

        self.buffer = ReplayBuffer()
        
        # Parameters
        self.device = "cuda:0"
        self.time_step = 0
        self.building_info = building_info # Can be used to create different RL agents based on basic building attributes or climate zones
        self.observation_spaces = observation_spaces
        self.action_spaces = action_spaces
        self.n_buildings = len(observation_spaces)
        self.networks_initialized = False
        
    def target_update(self):
        weights = self.model.model.get_weights()
        self.target_model.model.set_weights(weights)
    
    def replay(self):
        for _ in range(10):
            states, actions, rewards, next_states, done = self.buffer.sample()
            targets = self.target_model.predict(states)
            next_q_values = self.target_model.predict(next_states).max(axis=1)
            targets[range(batch_size), actions] = rewards + (1-done) * next_q_values * gamma
            self.model.train(states, targets)
    
    def train(self, max_episodes=1000):
        for ep in range(max_episodes):
            done, total_reward = False, 0
            state = self.env.reset()
            while not done:
                action = self.model.get_action(state)
                next_state, reward, done, _ = self.env.step(action)
                self.buffer.put(state, action, reward*0.01, next_state, done)
                total_reward += reward
                state = next_state
            if self.buffer.size() >= batch_size:
                self.replay()
            self.target_update()
            print('EP{} EpisodeReward={}'.format(ep, total_reward))

In [50]:
# Select many episodes for training. In the final run we will set this value to 1 (the buildings run for one year)
episodes = 10

k, c = 0, 0
cost, cum_reward = {}, {}

In [51]:
# RL CONTROLLER
#Instantiating the control agent(s)
agents = Agent(building_info, observations_spaces, actions_spaces)

In [52]:
# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
for e in range(1):     
    cum_reward[e] = 0
    rewards = []
    state = env.reset()
    done = False
    while not done:
        if k%(1000)==0:
            print('hour: '+str(k)+' of '+str(8760*episodes))
            
        action = agents.model.get_action(state)
        next_state, reward, done, _ = env.step(action)
        agents.buffer.put(state, action, reward, next_state, done)
        state = next_state
        
        cum_reward[e] += reward[0]
        rewards.append(reward)
        k+=1
        
    cost[e] = env.cost()
    if c%20==0:
        print(cost[e])
    c+=1

hour: 0 of 87600


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [44]:
len(observations_spaces)

9

In [45]:
len(actions_spaces)

9

In [53]:
state

array([array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  2.36,  0.  ,
        0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.46,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
       array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)], dtype=object)

In [None]:
state = np.reshape(state, [1, self.state_dim])
        self.epsilon *= eps_decay
        self.epsilon = max(self.epsilon, eps_min)
        q_value = self.predict(state)[0]

In [55]:
state = np.reshape(state, [1, 9])
      

In [56]:
state

array([[array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  2.36,  0.  ,
        0.  ], dtype=float32),
        array([ 1.  , 17.81, 25.29,  1.65,  0.  ], dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.46,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32),
        array([ 1.  , 17.81, 25.29,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
      dtype=float32)]], dtype=object)

In [58]:
q=ActionStateModel.predict(state)

TypeError: predict() missing 1 required positional argument: 'state'

In [60]:
action = agents.model.get_action(state)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [61]:
e=gym.make('CartPole-v1')

In [62]:
agent=Agent(e)

TypeError: object of type 'NoneType' has no len()

In [64]:
env

<citylearn.CityLearn at 0x1f5979d2cc8>

In [65]:
env.observation_space

Box(26,)