In [1]:
# Imports
import os
import logging
import pandas as pd
import wandb
import tensorflow as tf

logging.getLogger("wandb").setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'

import sys
sys.path.insert(0, '..')
from utils.reinforcementLearningHelper import *







In [2]:
#Setup Environments of selected buildings for training, evaluation, and testing

environments, observation_spec, action_spec  = setup_energymanagement_environments(num_buildings=30)

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_1"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 6 / Action Space: 1 / Upper bound: 2.3


In [3]:
# Setup Agent networks
SEED = 42
tf.random.set_seed(SEED)

batch_size = 128
replay_buffer_capacity = 20000 
initial_collect_steps = 2000
collect_steps_per_iteration = 20 
num_iterations = 10000 
eval_interval = 9990

num_rounds = 1
num_buildings = 30

In [4]:
observation_spec.shape[0]

6

In [5]:
import tensorflow as tf
from tf_agents.networks import network
from tf_agents.networks import encoding_network
from tf_agents.utils import common as common_utils
from tf_agents.networks import utils
from tf_agents.utils import nest_utils

class ActorNetwork(network.Network):

    def __init__(self, observation_spec,action_spec, custom_layers=None, dropout_rate=0.2, name='CustomActorNetwork'):
        
        super(ActorNetwork, self).__init__(input_tensor_spec=observation_spec, state_spec=(), name=name)

        self._action_spec = action_spec
    
        # Initialize the custom Keras layers
        if custom_layers is not None:
            self._custom_layers = custom_layers
        else:
            self._custom_layers = [
                tf.keras.layers.Reshape((observation_spec.shape[0], -1)),  # Adjust as per your actual sequence length and features
                tf.keras.layers.LSTM(8, return_sequences=True),
                tf.keras.layers.LSTM(8),
                tf.keras.layers.Dropout(dropout_rate),
                tf.keras.layers.Dense(64, activation='relu'),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(dropout_rate),
            ]
            
            #[
            #    tf.keras.layers.Reshape((observation_spec.shape[0], -1)),
            #    tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
            #    tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
            #    tf.keras.layers.BatchNormalization(),
            #    tf.keras.layers.Dropout(dropout_rate),
            #    tf.keras.layers.Flatten(),  # Flatten the output before passing it to dense layers
            #    tf.keras.layers.Dense(256, activation='relu'),
            #    tf.keras.layers.BatchNormalization(),
            #    tf.keras.layers.Dropout(dropout_rate),
            #]
            
            #[
            #    tf.keras.layers.Dense(256, activation='relu'),
            #    tf.keras.layers.BatchNormalization(),
            #    tf.keras.layers.Dropout(dropout_rate),
            #    tf.keras.layers.Dense(256, activation='relu'),
            #    tf.keras.layers.BatchNormalization(),
            #    tf.keras.layers.Dropout(dropout_rate),
            #]
            
        # Initialize Output layer -> output_dim = action_spec
        self._action_layer = tf.keras.layers.Dense(units= action_spec.shape[0], activation='tanh')


    def call(self, observations, step_type=(), network_state=()):
        
        #Preprocess Input
        outer_rank = nest_utils.get_outer_rank(observations, self.input_tensor_spec)
        batch_squash = utils.BatchSquash(outer_rank)
        observations = tf.nest.map_structure(batch_squash.flatten, observations)
        observations_flat = tf.nest.flatten(observations)

        # Custom Layers
        state = tf.concat(observations_flat, axis=-1)
        for layer in self._custom_layers:
            state = layer(state)
        
        #Output layer
        actions = self._action_layer(state)
        actions = common_utils.scale_to_spec(actions, self._action_spec)
        actions = batch_squash.unflatten(actions)

        return tf.nest.pack_sequence_as(self._action_spec, [actions]), network_state


class CriticNetwork(network.Network):

    def __init__(self, observation_spec, action_spec, custom_layers=None, dropout_rate=0.2, name='CustomCriticNetwork'):
        
        super(CriticNetwork, self).__init__(input_tensor_spec=(observation_spec, action_spec), state_spec=(), name=name)

        self._obs_spec = observation_spec
        self._action_spec = action_spec
                
        # Encoding layer concatenates state and action inputs, adds dense layer:
        kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1./3., mode='fan_in', distribution='uniform')
        combiner = tf.keras.layers.Concatenate(axis=-1)
        self._encoder = encoding_network.EncodingNetwork(
            (observation_spec, action_spec),
            fc_layer_params=(64,),
            preprocessing_combiner = combiner,
            activation_fn = tf.keras.activations.relu,
            kernel_initializer = kernel_initializer,
            batch_squash=True)

        # Initialize the custom tf layers here:
        if custom_layers is not None:
            self._custom_layers = custom_layers
        else:
            self._custom_layers = [
                tf.keras.layers.Dense(256, activation='relu'),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(dropout_rate),
                tf.keras.layers.Dense(256, activation='relu'),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(dropout_rate),
            ]
     
        # Initialize the value layer -> output_dim = 1 (Q-Value)
        self._value_layer = tf.keras.layers.Dense(
            units= 1,
            activation=tf.keras.activations.linear,
            kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003),
            name='Value')  # Q-function output


    def call(self, observations, step_type=(), network_state=()):
        # Forward pass through the custom tf layers here (defined above):
        state, network_state = self._encoder(observations, step_type=step_type, network_state=network_state)
                          
        # Apply custom layers
        for layer in self._custom_layers:
            state = layer(state)
        
        value = self._value_layer(state)
    
        return tf.reshape(value, [-1]), network_state

In [6]:
from tf_agents.specs import tensor_spec

observation_spec = tensor_spec.TensorSpec([6], tf.float32)
action_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, -1, 1)
dummy_obs = tf.random.uniform([1, 6])

actor_network = ActorNetwork(observation_spec, action_spec)
# Create a dummy step_type and network_state to match the call signature
dummy_step_type = tf.constant([0])
dummy_network_state = ()

# Call the network with dummy data to initialize weights
_ = actor_network(dummy_obs, dummy_step_type, dummy_network_state)

# Now, you should be able to get and set weights
weights = actor_network.get_weights()
print("Weights initialized:", weights[0][0][0])


Weights initialized: -0.40083313


In [7]:
from tf_agents.specs import tensor_spec

# Assuming CriticNetwork is defined somewhere else as per your setup
critic_network = CriticNetwork(observation_spec, action_spec)

observation_spec = tensor_spec.TensorSpec([6], tf.float32)
action_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, -1, 1)
dummy_obs = tf.random.uniform([1, 6])
dummy_action = tf.random.uniform([1, 1], minval=-1, maxval=1)

# Adjusted to fit the critic network call signature which typically takes both observations and actions
_ = critic_network((dummy_obs, dummy_action), dummy_step_type, dummy_network_state)

weights_c = critic_network.get_weights()
print("Weights initialized:", weights_c[0][0][0])

Weights initialized: 0.37675747


In [8]:
def initialize_custom_ddpg_agent(observation_spec, action_spec, global_step, environments): 
    
    actor_network = ActorNetwork(observation_spec, action_spec)
    critic_network = CriticNetwork(observation_spec, action_spec)
    target_actor_network = ActorNetwork(observation_spec, action_spec)
    target_critic_network = CriticNetwork(observation_spec, action_spec)
    
    agent_params = {
        "time_step_spec": environments["train"][f"building_{1}"].time_step_spec(),
        "action_spec": environments["train"][f"building_{1}"].action_spec(),
        "actor_network": actor_network,
        "critic_network": critic_network,
        "actor_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3), #1e-3
        "critic_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4), #1e-2
        "ou_stddev": 0.9, #0.9,
        "ou_damping": 0.15,
        "target_actor_network": target_actor_network,
        "target_critic_network": target_critic_network,
        "target_update_tau": 0.05,
        "target_update_period": 100, #5,
        "dqda_clipping": 0.5,
        "td_errors_loss_fn": tf.compat.v1.losses.huber_loss,
        "gamma": 1, #0.99,
        "reward_scale_factor": 1,
        "train_step_counter": global_step,
    }

    # Create the DdpgAgent with unpacked parameters
    ddpg_tf_agent = ddpg_agent.DdpgAgent(**agent_params)

    ddpg_tf_agent.initialize()
    eval_policy = ddpg_tf_agent.policy
    collect_policy = ddpg_tf_agent.collect_policy

    return ddpg_tf_agent, eval_policy, collect_policy

In [9]:
# LOCAL LEARNING

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for idx in range(num_buildings):
    for round in range(num_rounds):
        building_index=idx+1
        print("Building: ", building_index, " - round: ", round)
        #0. Reset global step
        tf.compat.v1.reset_default_graph()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        
        #1. Initalize agent
        tf_agent, eval_policy, collect_policy = initialize_custom_ddpg_agent(observation_spec, action_spec, global_step, environments)

        #2. Prepare training pipeline: Setup iterator, replay buffer, driver
        iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
            tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,collect_policy, 
            initial_collect_steps, collect_steps_per_iteration, batch_size
            )

        #3. Setup wandb logging
        artifact = initialize_wandb_logging(name=f"Exp_DDPG_LL_Home{building_index}_rd{round}", num_iterations=num_iterations)

        #4. Train, evaluate agent and store weights
        result_df, metrics = agent_training_and_evaluation(global_step, num_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_agent, eval_policy, building_index, result_df, eval_interval, environments)
            
        #5. End and log wandb
        end_and_log_wandb(metrics, artifact)

Building:  1  - round:  0
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


  result_df = pd.concat([result_df, pd.DataFrame({'Building': [building_index], 'Total Profit': [wandb.summary["Final Profit"]]})], ignore_index=True)


Building:  1  - Total Profit:  162.54106470999923
Building:  2  - round:  0
Building:  2  - Total Profit:  -24.31308116627295
Building:  3  - round:  0
Building:  3  - Total Profit:  172.84293097528013
Building:  4  - round:  0
Building:  4  - Total Profit:  9.961887182752822
Building:  5  - round:  0


KeyboardInterrupt: 