In [1]:
# Imports

import logging
import os
logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np


import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment, py_environment, batched_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
import matplotlib.pyplot as plt
import wandb

import sys
sys.path.insert(0, '..')
from environments.EnergyManagementEnv import EnergyManagementEnv
from utils.agentNetworks import ActorNetwork, CriticNetwork, CustomLayers
import utils.dataloader as DL





In [2]:
num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"]))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


Unnamed: 0,load_1,pv_1,price,fuelmix
0,1.149,0.0,0.05704,0.530991


# Federated Learning

In [3]:
federated_rounds = 3
num_rounds = 5

In [4]:
y = np.loadtxt(f'../../data/3final_data/Clusters_KMeans10_dtw.csv', delimiter=',').astype(int)
num_clusters = 10
cluster_users = {i: [] for i in range(num_clusters)}

# Iterate through each cluster
for cluster_number in range(num_clusters):
    users_in_cluster = np.where(y == cluster_number)[0] +1
    cluster_users[cluster_number] = users_in_cluster
cluster_users

{0: array([ 7, 14, 18, 22, 23, 25, 29], dtype=int64),
 1: array([6], dtype=int64),
 2: array([ 3,  4,  9, 13, 15, 19, 20, 30], dtype=int64),
 3: array([1], dtype=int64),
 4: array([21], dtype=int64),
 5: array([ 2, 28], dtype=int64),
 6: array([ 5, 10, 11, 12, 24, 26, 27], dtype=int64),
 7: array([8], dtype=int64),
 8: array([17], dtype=int64),
 9: array([16], dtype=int64)}

In [5]:
def avg_weights_with_noise_fedprox(weight_list, clip_threshold=None, noise_scale=0.001, proximal_term=0.1):
    avg_grad = list()

    for grad_list_tuple in zip(*weight_list):
        layer_mean = tf.math.reduce_mean(grad_list_tuple, axis=0)

        if clip_threshold is not None:
            layer_mean = tf.clip_by_value(layer_mean, -clip_threshold, clip_threshold)

        noise = tf.random.normal(shape=layer_mean.shape, mean=0.0, stddev=noise_scale)
        noisy_layer_mean = layer_mean + noise

        # Add FedProx proximal term
        proximal_update = -proximal_term * noisy_layer_mean

        avg_grad.append(noisy_layer_mean + proximal_update)

    return avg_grad

### Federated Training

In [6]:
tf.compat.v1.reset_default_graph()

batch_size = 1
replay_buffer_capacity = 100000
initial_collect_steps = 1000
collect_steps_per_iteration = 1000 #2000
num_iterations = 10 #1500

weights = {"actor_net": {}, "critic_net": {}, "target_actor_network": {}, "target_critic_network": {}}

for cluster in range(num_clusters):
    #Build and save global model
    print("Cluster: ", cluster)
    
    # 1. Build global networks
    global_step = tf.compat.v1.train.create_global_step()
    first_building_in_cluster = cluster_users[cluster][0]

    global_actor_net = ActorNetwork(
        observation_spec=environments["train"][f"building_{1}"].observation_spec(),
        action_spec=environments["train"][f"building_{1}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    global_critic_net = CriticNetwork(
        observation_spec=environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
        action_spec=environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    global_target_actor_network = ActorNetwork(
        observation_spec=environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
        action_spec=environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    global_target_critic_network = CriticNetwork(
        observation_spec=environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
        action_spec=environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    tf_agent = ddpg_agent.DdpgAgent(
        environments["train"][f"building_{first_building_in_cluster}"].time_step_spec(),
        environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
        actor_network= global_actor_net,
        critic_network= global_critic_net,
        actor_optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4), #-2 bis -4
        critic_optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3), #-1 bis -3
        ou_stddev=0.2, #0.3, # 0.2 , 0.3,
        ou_damping=0.15, #0.15, #0.15,
        target_actor_network= global_target_actor_network,
        target_critic_network= global_target_critic_network,
        target_update_tau=0.05, # 0.005, 0.01, 0.05,
        target_update_period=10, # 5, 20, 50
        dqda_clipping=1,
        td_errors_loss_fn= tf.compat.v1.losses.huber_loss, #tf.keras.losses.MeanSquaredError(),
        gamma=0.99, # 0.9, 0.99
        reward_scale_factor=10, # 1.0,
        train_step_counter=global_step,
    )

    tf_agent.initialize()
    collect_policy = tf_agent.collect_policy
    
    # 2. Store weights
    weights["actor_net"][cluster] = global_actor_net.get_weights()
    weights["critic_net"][cluster] = global_critic_net.get_weights()
    weights["target_actor_network"][cluster] = global_target_actor_network.get_weights()
    weights["target_critic_network"][cluster] = global_target_critic_network.get_weights()

    for federated_round  in range(federated_rounds):
        print("Started Federated training round ----------", federated_round+1, f"/ {federated_rounds}")
        for cluster_number, users_in_cluster in cluster_users.items():
            print(f"Cluster {cluster_number}:")
            for user_index in users_in_cluster:
                tf.compat.v1.reset_default_graph()
                print("User index: ", user_index)
                
                #Actor setup
                local_actor_net = ActorNetwork(
                    observation_spec=environments["train"][f"building_{1}"].observation_spec(),
                    action_spec=environments["train"][f"building_{1}"].action_spec(),
                    custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
                    )
                
                weights_list = [weights["actor_net"][cluster] for cluster in weights["actor_net"]]
                averaged_weights  = avg_weights_with_noise_fedprox(weights_list)
                #Build
                dummy_observation = tf.random.uniform(shape=[1] + environments["train"][f"building_{first_building_in_cluster}"].observation_spec().shape)
                _ = local_actor_net(dummy_observation)
                local_actor_net.set_weights(averaged_weights)

                #Critic setup
                local_critic_net = CriticNetwork(
                    observation_spec=environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
                    action_spec=environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                    custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
                    )
                weights_list = [weights["critic_net"][cluster] for cluster in weights["critic_net"]]
                averaged_weights  = avg_weights_with_noise_fedprox(weights_list)
                #Build
                observation_spec = environments["train"][f"building_{first_building_in_cluster}"].observation_spec()
                dummy_observation = tf.random.uniform(shape=[1] + observation_spec.shape)
                action_spec = environments["train"][f"building_{first_building_in_cluster}"].action_spec()
                dummy_action = tf.random.uniform(shape=[1] + list(action_spec.shape), minval=action_spec.minimum, maxval=action_spec.maximum)
                dummy_input = (dummy_observation, dummy_action)
                _ = local_critic_net(dummy_input)
                local_critic_net.set_weights(averaged_weights)

                #Target actor setup
                local_target_actor_network = ActorNetwork(
                    observation_spec=environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
                    action_spec=environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                    custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
                    )
                weights_list = [weights["target_actor_network"][cluster] for cluster in weights["target_actor_network"]]
                averaged_weights  = avg_weights_with_noise_fedprox(weights_list)
                #Build
                dummy_observation = tf.random.uniform(shape=[1] + environments["train"][f"building_{first_building_in_cluster}"].observation_spec().shape)
                _ = local_target_actor_network(dummy_observation)
                local_target_actor_network.set_weights(averaged_weights)

                local_target_critic_network = CriticNetwork(
                    observation_spec=environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
                    action_spec=environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                    custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
                    )
                weights_list = [weights["target_critic_network"][cluster] for cluster in weights["target_critic_network"]]
                averaged_weights  = avg_weights_with_noise_fedprox(weights_list)
                #Build
                observation_spec = environments["train"][f"building_{first_building_in_cluster}"].observation_spec()
                dummy_observation = tf.random.uniform(shape=[1] + observation_spec.shape)
                action_spec = environments["train"][f"building_{first_building_in_cluster}"].action_spec()
                dummy_action = tf.random.uniform(shape=[1] + list(action_spec.shape), minval=action_spec.minimum, maxval=action_spec.maximum)
                dummy_input = (dummy_observation, dummy_action)
                _ = local_target_critic_network(dummy_input)
                local_target_critic_network.set_weights(averaged_weights)

                local_tf_agent = ddpg_agent.DdpgAgent(
                    environments["train"][f"building_{first_building_in_cluster}"].time_step_spec(),
                    environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                    actor_network= global_actor_net,
                    critic_network= global_critic_net,
                    actor_optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4), #-2 bis -4
                    critic_optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3), #-1 bis -3
                    ou_stddev=0.2, #0.3, # 0.2 , 0.3,
                    ou_damping=0.15, #0.15, #0.15,
                    target_actor_network= global_target_actor_network,
                    target_critic_network= global_target_critic_network,
                    target_update_tau=0.05, # 0.005, 0.01, 0.05,
                    target_update_period=10, # 5, 20, 50
                    dqda_clipping=1,
                    td_errors_loss_fn= tf.compat.v1.losses.huber_loss, #tf.keras.losses.MeanSquaredError(),
                    gamma=0.99, # 0.9, 0.99
                    reward_scale_factor=10, # 1.0,
                    train_step_counter=global_step,
                )
                # Setup for training
                replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
                    data_spec=tf_agent.collect_data_spec,
                    batch_size= environments["train"][f"building_{user_index}"].batch_size,
                    max_length=replay_buffer_capacity,
                )

                initial_collect_driver = dynamic_step_driver.DynamicStepDriver(
                    environments["train"][f"building_{user_index}"],
                    collect_policy,
                    observers=[replay_buffer.add_batch],
                    num_steps=initial_collect_steps,
                )

                collect_driver = dynamic_step_driver.DynamicStepDriver(
                    environments["train"][f"building_{user_index}"],
                    collect_policy,
                    observers=[replay_buffer.add_batch],
                    num_steps=collect_steps_per_iteration,
                )

                wandb.login()
                wandb.init(
                    project="DDPG_battery_testing",
                    job_type="train_eval_test",
                    name=f"Exp_building{user_index}",
                    config={
                        "train_steps": num_iterations,
                        "batch_size": batch_size,
                        "actor_learning_rate": 1e-4,
                        "critic_learning_rate": 1e-3}
                )
                artifact = wandb.Artifact(name='save', type="checkpoint")

                eval_metrics = [tf_metrics.AverageReturnMetric(batch_size=batch_size)]
                test_metrics = [tf_metrics.AverageReturnMetric(batch_size=batch_size)]

                initial_collect_driver.run = common.function(initial_collect_driver.run)
                collect_driver.run = common.function(collect_driver.run)
                tf_agent.train = common.function(tf_agent.train)

                # Collect initial replay data
                initial_collect_driver.run()
                time_step = environments["train"][f"building_{user_index}"].reset()
                policy_state = collect_policy.get_initial_state(environments["train"][f"building_{user_index}"].batch_size)

                # pipeline which will feed data to the agent
                dataset = replay_buffer.as_dataset(num_parallel_calls=3, sample_batch_size=batch_size, num_steps=2).prefetch(3)
                iterator = iter(dataset)
                
                # Train and evaluate
                print(f"Start training building {user_index}")
                while global_step.numpy() < num_iterations:
                    print(global_step.numpy(), "/ ", num_iterations)
                    time_step, policy_state = collect_driver.run(
                        time_step=time_step,
                        policy_state=policy_state,
                    )
                    experience, _ = next(iterator)
                    train_loss = tf_agent.train(experience)

                    if global_step.numpy() % 2 == 0:
                        metrics = {}    
                        metrics["Loss"] = train_loss.loss
                        wandb.log(metrics)
                
                
                weights["actor_net"][cluster] = tf_agent._actor_network.get_weights()
                weights["critic_net"][cluster] = tf_agent._critic_network.get_weights()
                weights["target_actor_network"][cluster] = tf_agent._target_actor_network.get_weights()  # Assuming protected access
                weights["target_critic_network"][cluster] = tf_agent._target_critic_network.get_weights()  # Assuming protected access
                tf.compat.v1.reset_default_graph()


Cluster:  0
Started Federated training round ---------- 1 / 3
Cluster 0:
User index:  7
Start training building 7
0 /  10
1 /  10
2 /  10
3 /  10
4 /  10
5 /  10
6 /  10
7 /  10
8 /  10
9 /  10
User index:  14
Start training building 14
User index:  18
Start training building 18
User index:  22
Start training building 22
User index:  23


In [None]:
print("Start testing ...")
metrics = metric_utils.eager_compute(
    test_metrics,
    environments["test"][f"building_{idx+1}"],
    tf_agent.policy,
    num_episodes=batch_size)
logging = {}    
logging["AverageReturn"] = metrics['AverageReturn'].numpy()
wandb.log(logging)
#artifact.add_dir(local_path='checkpoints/ddpg/')
wandb.log_artifact(artifact)
wandb.finish()
tf.compat.v1.reset_default_graph()