In [1]:
# Imports

import logging
import os
logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np


import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment, py_environment, batched_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
import matplotlib.pyplot as plt
import wandb

import sys
sys.path.insert(0, '..')
from environments.EnergyManagementEnv import EnergyManagementEnv
from utils.agentNetworks import ActorNetwork, CriticNetwork, CustomLayers
import utils.dataloader as DL





In [2]:
num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"], logging=True))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


Unnamed: 0,load_1,pv_1,price,fuelmix
0,1.149,0.0,0.05704,0.530991


In [3]:
federated_rounds = 10

In [4]:
y = np.loadtxt(f'../../data/3final_data/Clusters_KMeans10_dtw.csv', delimiter=',').astype(int)
num_clusters = 10
cluster_buildings = {i: [] for i in range(num_clusters)}

# Iterate through each cluster
for cluster_number in range(num_clusters):
    buildings_in_cluster = np.where(y == cluster_number)[0] +1
    cluster_buildings[cluster_number] = buildings_in_cluster
cluster_buildings

{0: array([ 7, 14, 18, 22, 23, 25, 29], dtype=int64),
 1: array([6], dtype=int64),
 2: array([ 3,  4,  9, 13, 15, 19, 20, 30], dtype=int64),
 3: array([1], dtype=int64),
 4: array([21], dtype=int64),
 5: array([ 2, 28], dtype=int64),
 6: array([ 5, 10, 11, 12, 24, 26, 27], dtype=int64),
 7: array([8], dtype=int64),
 8: array([17], dtype=int64),
 9: array([16], dtype=int64)}

In [5]:
from tf_agents.agents import ddpg

def get_ddpg_agent(observation_spec, action_spec, custom_layers, global_step): 
    
    """actor_net = ActorNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)

    critic_net = CriticNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)
    
    target_actor_network = ActorNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)

    target_critic_network = CriticNetwork(observation_spec=observation_spec, action_spec=action_spec, custom_layers=custom_layers)
    """

    actor_net = ddpg.actor_network.ActorNetwork(
        input_tensor_spec=observation_spec,
        output_tensor_spec=action_spec, 
        fc_layer_params=(256, 256),
        activation_fn=tf.keras.activations.relu)
     
    critic_net = ddpg.critic_network.CriticNetwork(
        input_tensor_spec=(observation_spec, action_spec),
        joint_fc_layer_params=(256, 256),
        activation_fn=tf.keras.activations.relu)

    target_actor_network = ddpg.actor_network.ActorNetwork(
        input_tensor_spec=observation_spec,
        output_tensor_spec=action_spec, fc_layer_params=(256, 256),
        activation_fn=tf.keras.activations.relu)

    target_critic_network = ddpg.critic_network.CriticNetwork(
        input_tensor_spec=(observation_spec, action_spec),
        joint_fc_layer_params=(256, 256),
        activation_fn=tf.keras.activations.relu)
    

    agent_params = {
        "time_step_spec": environments["train"][f"building_{1}"].time_step_spec(),
        "action_spec": environments["train"][f"building_{1}"].action_spec(),
        "actor_network": actor_net,
        "critic_network": critic_net,
        "actor_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3), #1e-3
        "critic_optimizer": tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4), #1e-2
        "ou_stddev": 0.9, #0.9,
        "ou_damping": 0.15,
        "target_actor_network": target_actor_network,
        "target_critic_network": target_critic_network,
        "target_update_tau": 0.05,
        "target_update_period": 100, #5,
        "dqda_clipping": 0.5,
        "td_errors_loss_fn": tf.compat.v1.losses.huber_loss,
        "gamma": 1, #0.99,
        "reward_scale_factor": 1,
        "train_step_counter": global_step,
    }

    # Create the DdpgAgent with unpacked parameters
    tf_agent = ddpg_agent.DdpgAgent(**agent_params)

    tf_agent.initialize()
    eval_policy = tf_agent.policy
    collect_policy = tf_agent.collect_policy

    return tf_agent, eval_policy, collect_policy

In [6]:
def initialize_wandb_logging(project="DDPG_battery_testing", name="Exp", num_iterations=1500, batch_size=1, a_lr="1e-4", c_lr="1e-3"):
    wandb.login()
    wandb.init(
        project="DDPG_battery_testing",
        job_type="train_eval_test",
        name=name,
        config={
            "train_steps": num_iterations,
            "batch_size": batch_size,
            "actor_learning_rate": 1e-3,
            "critic_learning_rate": 1e-2}
    )
    artifact = wandb.Artifact(name='save', type="checkpoint")

    """train_checkpointer = common.Checkpointer(
            ckpt_dir='checkpoints/ddpg/',
            max_to_keep=1,
            agent=tf_agent,
            policy=tf_agent.policy,
            replay_buffer=replay_buffer,
            global_step=global_step
        )
        train_checkpointer.initialize_or_restore()"""

    return artifact

In [7]:
def setup_rl_training_pipeline(tf_agent, env_train, replay_buffer_capacity,collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size):
    
    #Setup replay buffer -> TFUniform to give each sample an equal selection chance
    replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
            data_spec=tf_agent.collect_data_spec,
            batch_size= env_train.batch_size,
            max_length=replay_buffer_capacity,
        )

    # Populate replay buffer with inital experience before actual training (for num_steps times)
    initial_collect_driver = dynamic_step_driver.DynamicStepDriver(
        env=env_train,
        policy=collect_policy,
        observers=[replay_buffer.add_batch],
        num_steps=initial_collect_steps,
    )

    # After the initial collection phase, the collect driver takes over for the continuous collection of data during the training process
    collect_driver = dynamic_step_driver.DynamicStepDriver(
        env=env_train,
        policy=collect_policy,
        observers=[replay_buffer.add_batch],
        num_steps=collect_steps_per_iteration,
    )

    # For better performance
    initial_collect_driver.run = common.function(initial_collect_driver.run)
    collect_driver.run = common.function(collect_driver.run)
    tf_agent.train = common.function(tf_agent.train)

    # Collect initial replay data
    initial_collect_driver.run()
    time_step = env_train.reset()
    policy_state = collect_policy.get_initial_state(env_train.batch_size)

    # The dataset is created from the replay buffer in a more structured and efficient way to provide mini-batches
    dataset = replay_buffer.as_dataset(
        num_parallel_calls=tf.data.experimental.AUTOTUNE, 
        sample_batch_size=batch_size, num_steps=2).prefetch(tf.data.experimental.AUTOTUNE)
    
    #Feed batches of experience to the agent for training
    iterator = iter(dataset)

    return iterator, collect_driver, time_step, policy_state

In [8]:
tf.compat.v1.reset_default_graph()

# Setup Agent networks
batch_size = 256
replay_buffer_capacity = 20000 #-> only <18.000 samples per dataset
initial_collect_steps = 2000
collect_steps_per_iteration = 20 
num_iterations = 10000 #10000
eval_interval = 9000 #3000

In [9]:
# Also possible: Attention score: Rank based, soft-max ohne normalization 
def calculate_mean_weights_with_noise(weights_list, noise_scale=0):
    # Assuming weights_list is a list containing two elements, where each element
    # is a list of numpy arrays representing the weights of an actor network
    mean_weights = []

    for weight_pair in zip(*weights_list):
        print(weight_pair)
        #1. Averaging
        mean_weight = tf.math.reduce_mean(tf.convert_to_tensor(weight_pair), axis=0)
        
        #2. Adding noise
        noise = tf.random.normal(shape=mean_weight.shape, mean=0.0, stddev=noise_scale)
        noisy_mean_weight = mean_weight + noise

        mean_weights.append(noisy_mean_weight)
    return mean_weights


def calculate_mean_weights_with_softmax_attention(weights_list, performance_metrics, noise_scale=0.3):
    
    mean_weights = []

    #Claulate standardized attention scores
    performance_metrics_tensor = tf.convert_to_tensor(performance_metrics, dtype=tf.float32)
    standardized_metrics = (performance_metrics_tensor - tf.reduce_mean(performance_metrics_tensor)) / tf.math.reduce_std(performance_metrics_tensor)
    attention_scores = tf.nn.softmax(standardized_metrics)

    for weight_pair in zip(*weights_list):
        
        stacked_weights = tf.stack(weight_pair, axis=0)
        weighted_mean_weight = tf.zeros_like(stacked_weights[0])

        # Iterate through each model's weights and the corresponding attention score
        for model_weights, attention_score in zip(stacked_weights, attention_scores):
            
            weighted_model_weights = model_weights * attention_score
            
            # Add noise to the weighted model weights
            noise = tf.random.normal(shape=weighted_model_weights.shape, mean=0.0, stddev=noise_scale)
            noisy_weighted_model_weights = weighted_model_weights + noise
            
            # Accumulate the weighted (and noised) weights
            weighted_mean_weight += noisy_weighted_model_weights
        
        mean_weights.append(weighted_mean_weight)
    
    return mean_weights

In [10]:
global_weights = {"actor_net": {}, "critic_net": {}, "target_actor_network": {}, "target_critic_network": {}}

#Initalize a global model for each Cluster of similar buildings
for cluster in range(num_clusters):
        # 1. Build global agent per cluster
        global_step = tf.compat.v1.train.get_or_create_global_step()
        first_building_in_cluster = cluster_buildings[cluster][0]

        global_tf_agent, global_eval_policy, global_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
                action_spec = environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step
                )

        # 2. Initially store weights
        global_weights["actor_net"][cluster] = global_tf_agent._actor_network.get_weights()
        global_weights["critic_net"][cluster] = global_tf_agent._critic_network.get_weights()
        global_weights["target_actor_network"][cluster] = global_tf_agent._target_actor_network.get_weights()
        global_weights["target_critic_network"][cluster] = global_tf_agent._target_critic_network.get_weights()

        model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster}/FLround{0}")
        os.makedirs(model_dir, exist_ok=True)

        np.savez(os.path.join(model_dir, "actor_network_weights.npz"), *global_tf_agent._actor_network.get_weights())
        np.savez(os.path.join(model_dir, "critic_weights.npz"), *global_tf_agent._critic_network.get_weights())
        np.savez(os.path.join(model_dir, "target_actor_weights.npz"), *global_tf_agent._target_actor_network.get_weights())
        np.savez(os.path.join(model_dir, "target_critic_weights.npz"), *global_tf_agent._target_critic_network.get_weights())

In [11]:
#Start Federated Learning - For each federated round
for federated_round  in range(federated_rounds):
    
    #Iterate through each cluster
    for cluster_number, buildings_in_cluster in cluster_buildings.items():
        print(f"Cluster {cluster_number}: Buildings {buildings_in_cluster} Federated round ----------", federated_round+1, f"/ {federated_rounds}")
        
        local_actor_weight_list = list()
        local_critic_weight_list = list()
        local_target_actor_weight_list = list()
        local_target_critic_weight_list = list()

        performance_metrics = list()

        #Iterate through the buildings per cluster
        for building_index in buildings_in_cluster:
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent and set global weights
            local_tf_agent, local_eval_policy, local_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step
                )
            
            model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{federated_round}")
            
            with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                global_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                global_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                global_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                global_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

            local_tf_agent._actor_network.set_weights(global_weights["actor_net"][cluster])
            local_tf_agent._critic_network.set_weights(global_weights["critic_net"][cluster])
            local_tf_agent._target_actor_network.set_weights(global_weights["target_actor_network"][cluster])
            local_tf_agent._target_critic_network.set_weights(global_weights["target_critic_network"][cluster])

            #2. Prepare training pipeline: Setup iterator, replay buffer, driver
            local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
                local_tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity, local_collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size
                )

            #3. Setup wandb logging
            #artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{federated_round+1}", num_iterations=num_iterations)

            #4. Start training
            #print(f"Start training building {building_index+1} - Round {federated_round+1}")
            
            eval_metrics = [tf_metrics.AverageReturnMetric()]
            test_metrics = [tf_metrics.AverageReturnMetric()]

            while global_step.numpy() < num_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                local_time_step, local_policy_state = local_collect_driver.run(time_step=local_time_step, policy_state=local_policy_state)
                local_experience, _ = next(local_iterator)
                local_train_loss = local_tf_agent.train(local_experience)
                
                """metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        local_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)"""
                
                
                #performance_metrics.append()
                #if global_step.numpy() % 2 == 0:
                #    metrics["loss"] = local_train_loss.loss
                #    wandb.log(metrics)
            
            metrics = metric_utils.eager_compute(test_metrics,environments["eval"][f"building_{building_index}"], local_eval_policy, num_episodes=1)
            print("Return: ", metrics["AverageReturn"].numpy())
            performance_metrics.append(metrics["AverageReturn"].numpy())
            
            #5. Add local agent weights to list
            local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())
            local_critic_weight_list.append(local_tf_agent._critic_network.get_weights())
            local_target_actor_weight_list.append(local_tf_agent._target_actor_network.get_weights())
            local_target_critic_weight_list.append(local_tf_agent._target_critic_network.get_weights())

        # Performe Federated aggregation
        print("Performance List: ", performance_metrics)
        average_actor_weights = calculate_mean_weights_with_softmax_attention(local_actor_weight_list, performance_metrics)
        average_critic_weights = calculate_mean_weights_with_softmax_attention(local_critic_weight_list, performance_metrics) 
        average_target_actor_weights = calculate_mean_weights_with_softmax_attention(local_target_actor_weight_list, performance_metrics) 
        average_target_critic_weights = calculate_mean_weights_with_softmax_attention(local_target_critic_weight_list, performance_metrics)    
        
        global_tf_agent._actor_network.set_weights(average_actor_weights)
        global_tf_agent._critic_network.set_weights(average_critic_weights)
        global_tf_agent._target_actor_network.set_weights(average_target_actor_weights)
        global_tf_agent._target_critic_network.set_weights(average_target_critic_weights)

        model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{federated_round+1}_WAAwN")
        os.makedirs(model_dir, exist_ok=True)

        np.savez(os.path.join(model_dir, "actor_network_weights.npz"), *global_tf_agent._actor_network.get_weights())
        np.savez(os.path.join(model_dir, "critic_weights.npz"), *global_tf_agent._critic_network.get_weights())
        np.savez(os.path.join(model_dir, "target_actor_weights.npz"), *global_tf_agent._target_actor_network.get_weights())
        np.savez(os.path.join(model_dir, "target_critic_weights.npz"), *global_tf_agent._target_critic_network.get_weights())

Cluster 0: Buildings [ 7 14 18 22 23 25 29] Federated round ---------- 1 / 10
Return:  4955.4487
Return:  4781.4746
Return:  4235.6245
Return:  4491.1934
Return:  4478.134
Return:  -192051.2
Return:  -191878.56
Performance List:  [4955.4487, 4781.4746, 4235.6245, 4491.1934, 4478.134, -192051.2, -191878.56]
Cluster 1: Buildings [6] Federated round ---------- 1 / 10
Return:  4083.3286
Performance List:  [4083.3286]
Cluster 2: Buildings [ 3  4  9 13 15 19 20 30] Federated round ---------- 1 / 10
Return:  4648.6763
Return:  4337.7705
Return:  4456.5757
Return:  -191450.55
Return:  4600.381
Return:  4810.3867
Return:  4794.2197
Return:  4618.5093
Performance List:  [4648.6763, 4337.7705, 4456.5757, -191450.55, 4600.381, 4810.3867, 4794.2197, 4618.5093]
Cluster 3: Buildings [1] Federated round ---------- 1 / 10
Return:  5425.6606
Performance List:  [5425.6606]
Cluster 4: Buildings [21] Federated round ---------- 1 / 10
Return:  3420.2805
Performance List:  [3420.2805]
Cluster 5: Buildings [ 

In [10]:
num_rounds=1

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for cluster_number, buildings_in_cluster in cluster_buildings.items():

    for building_index in buildings_in_cluster:
        
        for round in range(num_rounds):
            print("Cluster: ", cluster_number, " - Building: ", building_index, " - round: ", round)
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent and set trained global weights
            federated_tf_agent, federated_eval_policy, federated_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step
                )
            
            model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{3}")
            
            with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

            #Setup iterator, replay buffer, driver
            iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
                federated_tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity, federated_collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size
                )

            #Setup wandb logging
            artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{round}", num_iterations=num_iterations)
            
            #2. Train and evaluate
            eval_metrics = [tf_metrics.AverageReturnMetric()]
            test_metrics = [tf_metrics.AverageReturnMetric()]

            while global_step.numpy() < num_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                time_step, policy_state = collect_driver.run(time_step=time_step, policy_state=policy_state)
                experience, _ = next(iterator)
                train_loss = federated_tf_agent.train(experience)
                
                metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        federated_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)
                
                if global_step.numpy() % 2 == 0:
                    metrics["loss"] = train_loss.loss
                    wandb.log(metrics)

            #3. Start testing
            metrics = metric_utils.eager_compute(test_metrics,environments["test"][f"building_{building_index}"], federated_eval_policy, num_episodes=1)
            print('Building: ', building_index, ' - Total Profit: ', wandb.summary["Total Profit"])
            result_df = pd.concat([result_df, pd.DataFrame({'Building': [building_index], 'Total Profit': [wandb.summary["Total Profit"]]})], ignore_index=True)
            wandb.log(metrics)
            #artifact.add_dir(local_path='checkpoints/ddpg/')
            wandb.log_artifact(artifact)
            wandb.finish()

Cluster:  0  - Building:  7  - round:  0
Building:  7  - Total Profit:  394.6404406116863
Cluster:  0  - Building:  14  - round:  0
Building:  14  - Total Profit:  -130.95716455000016
Cluster:  0  - Building:  18  - round:  0
Building:  18  - Total Profit:  -311.0430112700002
Cluster:  0  - Building:  22  - round:  0
Building:  22  - Total Profit:  -156.31512790999983
Cluster:  0  - Building:  23  - round:  0
Building:  23  - Total Profit:  264.5340516986598
Cluster:  0  - Building:  25  - round:  0
Building:  25  - Total Profit:  -297.6910558299997
Cluster:  0  - Building:  29  - round:  0
Building:  29  - Total Profit:  -173.7948776
Cluster:  1  - Building:  6  - round:  0
Building:  6  - Total Profit:  -596.3276914678621
Cluster:  2  - Building:  3  - round:  0
Building:  3  - Total Profit:  413.99348610369844
Cluster:  2  - Building:  4  - round:  0
Building:  4  - Total Profit:  252.5607129845912
Cluster:  2  - Building:  9  - round:  0
Building:  9  - Total Profit:  297.639130099

KeyboardInterrupt: 

In [11]:
result_df["Total Profit"]

0     391.271085
1    -130.957165
2     114.282858
3    -156.315128
4    -158.256629
5    -297.691056
6    -173.794878
7    -596.327691
8     418.249709
9     251.295036
10    295.560140
11    388.206348
12    316.959420
13    400.539787
14    166.514324
15    326.726452
16    178.689987
17     11.579345
18    207.679142
19    306.928433
20    215.829310
21    237.994325
22    232.283552
23    191.929315
24    583.194516
25     47.613313
26    211.460812
27   -183.380170
28     57.571424
29    267.557714
Name: Total Profit, dtype: float64