In [1]:
# Imports

import logging
import os
logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np


import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment, py_environment, batched_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
import matplotlib.pyplot as plt
import wandb

import sys
sys.path.insert(0, '..')
from environments.EnergyManagementEnv import EnergyManagementEnv
from utils.agentNetworks import ActorNetwork, CriticNetwork, CustomLayers
import utils.dataloader as DL





In [2]:
num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"]))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


Unnamed: 0,load_1,pv_1,price,fuelmix
0,1.149,0.0,0.05704,0.530991


# Federated Learning

In [3]:
federated_rounds = 3
num_rounds = 2

In [4]:
y = np.loadtxt(f'../../data/3final_data/Clusters_KMeans10_dtw.csv', delimiter=',').astype(int)
num_clusters = 10
cluster_buildings = {i: [] for i in range(num_clusters)}

# Iterate through each cluster
for cluster_number in range(num_clusters):
    buildings_in_cluster = np.where(y == cluster_number)[0] +1
    cluster_buildings[cluster_number] = buildings_in_cluster
cluster_buildings

{0: array([ 7, 14, 18, 22, 23, 25, 29], dtype=int64),
 1: array([6], dtype=int64),
 2: array([ 3,  4,  9, 13, 15, 19, 20, 30], dtype=int64),
 3: array([1], dtype=int64),
 4: array([21], dtype=int64),
 5: array([ 2, 28], dtype=int64),
 6: array([ 5, 10, 11, 12, 24, 26, 27], dtype=int64),
 7: array([8], dtype=int64),
 8: array([17], dtype=int64),
 9: array([16], dtype=int64)}

In [5]:
def avg_weights_with_noise_fedprox(weight_list, clip_threshold=None, noise_scale=0.001, proximal_term=0.1):
    avg_grad = list()

    for grad_list_tuple in zip(*weight_list):
        layer_mean = tf.math.reduce_mean(grad_list_tuple, axis=0)

        if clip_threshold is not None:
            layer_mean = tf.clip_by_value(layer_mean, -clip_threshold, clip_threshold)

        noise = tf.random.normal(shape=layer_mean.shape, mean=0.0, stddev=noise_scale)
        noisy_layer_mean = layer_mean + noise

        # Add FedProx proximal term
        proximal_update = -proximal_term * noisy_layer_mean

        avg_grad.append(noisy_layer_mean + proximal_update)

    return avg_grad

### Federated Training

In [6]:
from tf_agents.agents import ddpg

def get_ddpg_agent(observation_spec, action_spec, custom_layers, global_step): 
    
    """actor_net = ActorNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)

    critic_net = CriticNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)
    
    target_actor_network = ActorNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)

    target_critic_network = CriticNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)
    """

    actor_net = ddpg.actor_network.ActorNetwork(
        input_tensor_spec=observation_spec,
        output_tensor_spec=action_spec, 
        fc_layer_params=(32, 32),
        activation_fn=tf.keras.activations.relu)
     
    critic_net = ddpg.critic_network.CriticNetwork(
        input_tensor_spec=(observation_spec, action_spec),
        joint_fc_layer_params=(32, 32),
        activation_fn=tf.keras.activations.relu)

    target_actor_network = ddpg.actor_network.ActorNetwork(
        input_tensor_spec=observation_spec,
        output_tensor_spec=action_spec, fc_layer_params=(32, 32),
        activation_fn=tf.keras.activations.relu)

    target_critic_network = ddpg.critic_network.CriticNetwork(
        input_tensor_spec=(observation_spec, action_spec),
        joint_fc_layer_params=(32, 32),
        activation_fn=tf.keras.activations.relu)
    

    agent_params = {
        "time_step_spec": environments["train"][f"building_{idx+1}"].time_step_spec(),
        "action_spec": environments["train"][f"building_{idx+1}"].action_spec(),
        "actor_network": actor_net,
        "critic_network": critic_net,
        "actor_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3),
        "critic_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-2),
        "ou_stddev": 0.9,
        "ou_damping": 0.3,
        "target_actor_network": target_actor_network,
        "target_critic_network": target_critic_network,
        "target_update_tau": 0.05,
        "target_update_period": 5,
        "dqda_clipping": None,
        "td_errors_loss_fn": tf.compat.v1.losses.huber_loss,
        "gamma": 0.99,
        "reward_scale_factor": 1.0,
        "train_step_counter": global_step,
    }

    # Create the DdpgAgent with unpacked parameters
    tf_agent = ddpg_agent.DdpgAgent(**agent_params)

    tf_agent.initialize()
    eval_policy = tf_agent.policy
    collect_policy = tf_agent.collect_policy

    return tf_agent, eval_policy, collect_policy

In [7]:
def initialize_wandb_logging(project="DDPG_battery_testing", name="Exp", num_iterations=1500, batch_size=1, a_lr="1e-4", c_lr="1e-3"):
    wandb.login()
    wandb.init(
        project="DDPG_battery_testing",
        job_type="train_eval_test",
        name=name,
        config={
            "train_steps": num_iterations,
            "batch_size": batch_size,
            "actor_learning_rate": 1e-4,
            "critic_learning_rate": 1e-3}
    )
    artifact = wandb.Artifact(name='save', type="checkpoint")

    """train_checkpointer = common.Checkpointer(
            ckpt_dir='checkpoints/ddpg/',
            max_to_keep=1,
            agent=tf_agent,
            policy=tf_agent.policy,
            replay_buffer=replay_buffer,
            global_step=global_step
        )
        train_checkpointer.initialize_or_restore()"""

    return artifact

In [8]:
def setup_rl_training_pipeline(tf_agent, env_train, replay_buffer_capacity,collect_policy, initial_collect_steps, collect_steps_per_iteration):
    
    #Setup replay buffer -> TFUniform to give each sample an equal selection chance
    replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
            data_spec=tf_agent.collect_data_spec,
            batch_size= env_train.batch_size,
            max_length=replay_buffer_capacity,
        )

    # Populate replay buffer with inital experience before actual training (for num_steps times)
    initial_collect_driver = dynamic_step_driver.DynamicStepDriver(
        env=env_train,
        policy=collect_policy,
        observers=[replay_buffer.add_batch],
        num_steps=initial_collect_steps,
    )

    # After the initial collection phase, the collect driver takes over for the continuous collection of data during the training process
    collect_driver = dynamic_step_driver.DynamicStepDriver(
        env=env_train,
        policy=collect_policy,
        observers=[replay_buffer.add_batch],
        num_steps=collect_steps_per_iteration,
    )

    # For better performance
    initial_collect_driver.run = common.function(initial_collect_driver.run)
    collect_driver.run = common.function(collect_driver.run)
    tf_agent.train = common.function(tf_agent.train)

    # Collect initial replay data
    initial_collect_driver.run()
    time_step = env_train.reset()
    policy_state = collect_policy.get_initial_state(env_train.batch_size)

    # The dataset is created from the replay buffer in a more structured and efficient way to provide mini-batches
    dataset = replay_buffer.as_dataset(
        num_parallel_calls=tf.data.experimental.AUTOTUNE, 
        sample_batch_size=env_train.batch_size, num_steps=2).prefetch(tf.data.experimental.AUTOTUNE)
    
    #Feed batches of experience to the agent for training
    iterator = iter(dataset)

    return iterator, collect_driver, time_step, policy_state

In [9]:
tf.compat.v1.reset_default_graph()

In [10]:
# Setup Agent networks
batch_size = 1
replay_buffer_capacity = 20000 #Before: 1000000 -> But only <18.000 samples per dataset
initial_collect_steps = 2000
collect_steps_per_iteration = 20 #2000
num_iterations = 500 #10000
eval_interval = 4000 #3000

global_weights = {"actor_net": {}, "critic_net": {}, "target_actor_network": {}, "target_critic_network": {}}

#Initalize a global model for each Cluster of similar buildings
for cluster in range(num_clusters):
       
    # 1. Build global agent per cluster
    global_step = tf.compat.v1.train.get_or_create_global_step()
    first_building_in_cluster = cluster_buildings[cluster][0]

    global_tf_agent, global_eval_policy, global_collect_policy = get_ddpg_agent(
            observation_spec = environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
            action_spec = environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
            custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
            global_step = global_step
            )
    
    # 2. Initially store weights
    global_weights["actor_net"][cluster] = global_tf_agent._actor_network.get_weights()
    global_weights["critic_net"][cluster] = global_tf_agent._critic_network.get_weights()
    global_weights["target_actor_network"][cluster] = global_tf_agent._target_actor_network.get_weights()
    global_weights["target_critic_network"][cluster] = global_tf_agent._target_critic_network.get_weights()

#Start Federated Learning - For each federated round
for federated_round  in range(federated_rounds):
    
    #Iterate through each cluster
    for cluster_number, buildings_in_cluster in cluster_buildings.items():
        print(f"Cluster {cluster_number}: Started Federated training round ----------", federated_round+1, f"/ {federated_rounds}")
        
        local_actor_weight_list = list()
        local_critic_weight_list = list()
        local_target_actor_weight_list = list()
        local_target_critic_weight_list = list()

        #Iterate through the buildings per cluster
        for building_index in buildings_in_cluster:
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent and set global weights
            local_tf_agent, local_eval_policy, local_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step
                )
            local_tf_agent._actor_network.set_weights(global_weights["actor_net"][cluster])
            local_tf_agent._critic_network.set_weights(global_weights["critic_net"][cluster])
            local_tf_agent._target_actor_network.set_weights(global_weights["target_actor_network"][cluster])
            local_tf_agent._target_critic_network.set_weights(global_weights["target_critic_network"][cluster])

            #2. Prepare training pipeline: Setup iterator, replay buffer, driver
            local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
                local_tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity, local_collect_policy, initial_collect_steps, collect_steps_per_iteration
                )

            #3. Setup wandb logging
            #artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{federated_round+1}", num_iterations=num_iterations)

            #4. Start training
            #print(f"Start training building {building_index+1} - Round {federated_round+1}")
            
            eval_metrics = [tf_metrics.AverageReturnMetric(batch_size=batch_size)]
            test_metrics = [tf_metrics.AverageReturnMetric(batch_size=batch_size)]

            while global_step.numpy() < num_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                local_time_step, local_policy_state = local_collect_driver.run(time_step=local_time_step, policy_state=local_policy_state)
                local_experience, _ = next(local_iterator)
                local_train_loss = local_tf_agent.train(local_experience)
                
                metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        local_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)
                
                #if global_step.numpy() % 2 == 0:
                #    metrics["loss"] = local_train_loss.loss
                #    wandb.log(metrics)
            
            #5. Add local agent weights to list
            local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())
            local_critic_weight_list.append(local_tf_agent._critic_network.get_weights())
            local_target_actor_weight_list.append(local_tf_agent._target_actor_network.get_weights())
            local_target_critic_weight_list.append(local_tf_agent._target_critic_network.get_weights())

        # Performe Federated aggregation
        average_actor_weights = avg_weights_with_noise_fedprox(local_actor_weight_list)
        average_critic_weights = avg_weights_with_noise_fedprox(local_critic_weight_list) 
        average_target_actor_weights = avg_weights_with_noise_fedprox(local_target_actor_weight_list) 
        average_target_critic_weights = avg_weights_with_noise_fedprox(local_target_critic_weight_list)    
        
        global_tf_agent._actor_network.set_weights(average_actor_weights)
        global_tf_agent._critic_network.set_weights(average_critic_weights)
        global_tf_agent._target_actor_network.set_weights(average_target_actor_weights)
        global_tf_agent._target_critic_network.set_weights(average_target_critic_weights)

        
        os.makedirs(f"/models/cluster_{cluster_number}/FLround{federated_round+1}", exist_ok=True)
        
        current_directory = os.getcwd()                    
        np.savez_compressed(os.path.join(current_directory,f"/models/cluster_{cluster_number}/FLround{federated_round+1}/actor_weights.npz"), *global_tf_agent._actor_network.get_weights())
        np.savez_compressed(os.path.join(current_directory,f"/models/cluster_{cluster_number}/FLround{federated_round+1}/critic_weights.npz"), *global_tf_agent._critic_network.get_weights())
        np.savez_compressed(os.path.join(current_directory,f"/models/cluster_{cluster_number}/FLround{federated_round+1}/target_actor_weights.npz"), *global_tf_agent._target_actor_network.get_weights())
        np.savez_compressed(os.path.join(current_directory,f"/models/cluster_{cluster_number}/FLround{federated_round+1}/target_critic_weights.npz"), *global_tf_agent._target_critic_network.get_weights())

Cluster 0: Started Federated training round ---------- 1 / 3
Cluster 1: Started Federated training round ---------- 1 / 3
Cluster 2: Started Federated training round ---------- 1 / 3
Cluster 3: Started Federated training round ---------- 1 / 3
Cluster 4: Started Federated training round ---------- 1 / 3
Cluster 5: Started Federated training round ---------- 1 / 3
Cluster 6: Started Federated training round ---------- 1 / 3
Cluster 7: Started Federated training round ---------- 1 / 3
Cluster 8: Started Federated training round ---------- 1 / 3
Cluster 9: Started Federated training round ---------- 1 / 3
Cluster 0: Started Federated training round ---------- 2 / 3
Cluster 1: Started Federated training round ---------- 2 / 3
Cluster 2: Started Federated training round ---------- 2 / 3
Cluster 3: Started Federated training round ---------- 2 / 3
Cluster 4: Started Federated training round ---------- 2 / 3
Cluster 5: Started Federated training round ---------- 2 / 3
Cluster 6: Started Feder

In [16]:
num_rounds=1

for cluster_number, buildings_in_cluster in cluster_buildings.items():
    print(f"Cluster {cluster_number}:")
    for building_index in buildings_in_cluster:
        
        for round in range(num_rounds):
            print("Building: ", building_index, " - round: ", round)
            
            # Load global agent
            #current_directory = os.getcwd()
            #weights_path = os.path.join(current_directory, f"models/cluster_{cluster_number}/FLround{3}")
            weights_path = f"models/cluster_{cluster_number}/FLround{3}"
            
            # Load weights into global_tf_agent         
            actor_net = ddpg.actor_network.ActorNetwork(
                input_tensor_spec=environments["train"][f"building_{building_index}"].observation_spec(),
                output_tensor_spec=environments["train"][f"building_{building_index}"].action_spec(), 
                fc_layer_params=(32, 32),
                activation_fn=tf.keras.activations.relu)
            
            critic_net = ddpg.critic_network.CriticNetwork(
                input_tensor_spec=(environments["train"][f"building_{building_index}"].observation_spec(), environments["train"][f"building_{building_index}"].action_spec()),
                joint_fc_layer_params=(32, 32),
                activation_fn=tf.keras.activations.relu)

            target_actor_network = ddpg.actor_network.ActorNetwork(
                input_tensor_spec=environments["train"][f"building_{building_index}"].observation_spec(),
                output_tensor_spec=environments["train"][f"building_{building_index}"].action_spec(), fc_layer_params=(32, 32),
                activation_fn=tf.keras.activations.relu)

            target_critic_network = ddpg.critic_network.CriticNetwork(
                input_tensor_spec=(environments["train"][f"building_{building_index}"].observation_spec(), environments["train"][f"building_{building_index}"].action_spec()),
                joint_fc_layer_params=(32, 32),
                activation_fn=tf.keras.activations.relu)
            

            agent_params = {
                "time_step_spec": environments["train"][f"building_{idx+1}"].time_step_spec(),
                "action_spec": environments["train"][f"building_{idx+1}"].action_spec(),
                "actor_network": actor_net,
                "critic_network": critic_net,
                "actor_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3),
                "critic_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-2),
                "ou_stddev": 0.9,
                "ou_damping": 0.3,
                "target_actor_network": target_actor_network,
                "target_critic_network": target_critic_network,
                "target_update_tau": 0.05,
                "target_update_period": 5,
                "dqda_clipping": None,
                "td_errors_loss_fn": tf.compat.v1.losses.huber_loss,
                "gamma": 0.99,
                "reward_scale_factor": 1.0,
                "train_step_counter": global_step,
            }

            # Create the DdpgAgent with unpacked parameters
            tf_agent = ddpg_agent.DdpgAgent(**agent_params)

            actor_weights = np.load(f"C:\models/cluster_{cluster_number}/FLround{3}/actor_weights.npz", allow_pickle=True)
            critic_weights = np.load(f"C:\models/cluster_{cluster_number}/FLround{3}/critic_weights.npz", allow_pickle=True)
            target_actor_weights = np.load(f"C:\models/cluster_{cluster_number}/FLround{3}/target_actor_weights.npz", allow_pickle=True)
            target_critic_weights = np.load(f"C:\models/cluster_{cluster_number}/FLround{3}/target_critic_weights.npz", allow_pickle=True)

            tf_agent._actor_network.set_weights([actor_weights[f'arr_{i}'] for i in range(len(actor_weights.files))])
            tf_agent._critic_network.set_weights([critic_weights[f'arr_{i}'] for i in range(len(critic_weights.files))])
            tf_agent._target_actor_network.set_weights([target_actor_weights[f'arr_{i}'] for i in range(len(target_actor_weights.files))])
            tf_agent._target_critic_network.set_weights([target_critic_weights[f'arr_{i}'] for i in range(len(target_critic_weights.files))])

            tf_agent.initialize()
            eval_policy = tf_agent.policy
            
            print("Start testing ...")
            artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{federated_round+1}", num_iterations=num_iterations)
            metrics = metric_utils.eager_compute(
                test_metrics,
                environments["test"][f"building_{building_index}"],
                eval_policy,
                num_episodes=1
                )
            wandb.log(metrics)
            #artifact.add_dir(local_path='checkpoints/ddpg/')
            wandb.log_artifact(artifact)
            wandb.finish()
            tf.compat.v1.reset_default_graph()

Cluster 0:
Building:  7  - round:  0
Start testing ...
Building:  14  - round:  0
Start testing ...
Building:  18  - round:  0
Start testing ...
Building:  22  - round:  0
Start testing ...
Building:  23  - round:  0
Start testing ...
Building:  25  - round:  0
Start testing ...
Problem at: C:\Users\rs1044\AppData\Local\Temp\ipykernel_26500\2961659840.py 3 initialize_wandb_logging


KeyboardInterrupt: 