In [1]:
# Imports

import logging
import os
logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np


import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment, py_environment, batched_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
import matplotlib.pyplot as plt
import wandb

import sys
sys.path.insert(0, '..')
from environments.EnergyManagementEnv import EnergyManagementEnv
from utils.agentNetworks import ActorNetwork, CriticNetwork, CustomLayers
import utils.dataloader as DL
from utils.federatedLearningHandler import *





In [40]:
# Also possible: Attention score: Rank based, soft-max ohne normalization 
def calculate_mean_weights_with_noise(weights_list, noise_scale=0.1):
    # Assuming weights_list is a list containing two elements, where each element
    # is a list of numpy arrays representing the weights of an actor network
    mean_weights = []

    for weight_pair in zip(*weights_list):
        print(weight_pair)
        #1. Averaging
        mean_weight = tf.math.reduce_mean(tf.convert_to_tensor(weight_pair), axis=0)
        
        #2. Adding noise
        noise = tf.random.normal(shape=mean_weight.shape, mean=0.0, stddev=noise_scale)
        noisy_mean_weight = mean_weight + noise

        mean_weights.append(noisy_mean_weight)
    return mean_weights


def calculate_mean_weights_with_softmax_attention(weights_list, performance_metrics, noise_scale=0.3):
    
    """# Check if weights_list only contains weights from one building
    if len(weights_list) == 1:
        # If so, simply return the weights with noise added
        single_building_weights = weights_list[0]
        noisy_single_building_weights = []
        for weight in single_building_weights:
            noise = tf.random.normal(shape=weight.shape, mean=0.0, stddev=noise_scale)
            noisy_single_building_weights.append(weight + noise)
        return noisy_single_building_weights
    """
    mean_weights = []
    #Claulate standardized attention scores
    performance_metrics_tensor = tf.convert_to_tensor(performance_metrics, dtype=tf.float32)
    standardized_metrics = (performance_metrics_tensor - tf.reduce_mean(performance_metrics_tensor)) / tf.math.reduce_std(performance_metrics_tensor)
    attention_scores = tf.nn.softmax(standardized_metrics)

    for weight_pair in zip(*weights_list):
        
        stacked_weights = tf.stack(weight_pair, axis=0)
        weighted_mean_weight = tf.zeros_like(stacked_weights[0])

        # Iterate through each model's weights and the corresponding attention score
        for model_weights, attention_score in zip(stacked_weights, attention_scores):
            
            weighted_model_weights = model_weights * attention_score
            
            # Add noise to the weighted model weights
            noise = tf.random.normal(shape=weighted_model_weights.shape, mean=0.0, stddev=noise_scale)
            noisy_weighted_model_weights = weighted_model_weights + noise
            
            # Accumulate the weighted (and noised) weights
            weighted_mean_weight += noisy_weighted_model_weights
        
        mean_weights.append(weighted_mean_weight)
    
    return mean_weights

In [59]:
#0. Reset global step
num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"], logging=True))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

tf.compat.v1.reset_default_graph()
global_step = tf.compat.v1.train.get_or_create_global_step()

#1. Initalize local agent and set global weights
local_tf_agent, local_eval_policy, local_collect_policy = get_ddpg_agent(
    observation_spec = environments["train"][f"building_{1}"].observation_spec(),
    action_spec = environments["train"][f"building_{1}"].action_spec(),
    custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
    global_step = global_step,
    environments = environments,
    )

model_dir = os.path.join(os.getcwd(), f"models/cluster_{1}/FLround{3}_c{10}_WAAwN")
            
with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
    # Extract the arrays using the keys corresponding to their order
    local_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
    # Extract the arrays using the keys corresponding to their order
    local_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
    # Extract the arrays using the keys corresponding to their order
    local_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
    # Extract the arrays using the keys corresponding to their order
    local_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

#2. Prepare training pipeline: Setup iterator, replay buffer, driver
local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
    local_tf_agent, environments["train"][f"building_{1}"], 
    20000, local_collect_policy, 2000, 20, 128)

local_actor_weight_list = []
local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())
local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())
local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


In [71]:
def federated_aggregation(weights_list, performance_metrics, noise_scale=0.03):
    
    #aggregated_weights = []

    #0. Normalize performance metric to get percentual importance
    performance_tensor = tf.convert_to_tensor(performance_metrics, dtype=tf.float32)
    performance_normed = performance_tensor / tf.reduce_sum(performance_tensor)


    #1. Align weights: Unpack weights_list and pair up the corresponding elements of each sublist
    for weight_pair in zip(*weights_list):
        
        #1. Average weights
        #mean_weight = tf.math.reduce_mean(tf.convert_to_tensor(weight_pair), axis=0)
        weight_tensor = tf.convert_to_tensor(weight_pair, dtype=tf.float32)
        mean_weight = tf.reduce_sum(weight_tensor * performance_normed[:, tf.newaxis, tf.newaxis], axis=0)

        #2. Add noise
        noise = tf.random.normal(shape=mean_weight.shape, mean=0.0, stddev=noise_scale)
        noisy_mean_weight = mean_weight + noise

        #aggregated_weights.append(noisy_mean_weight)
    return noisy_mean_weight

# Performe Federated aggregation
performance_metrics = list()
performance_metrics.append(10)
performance_metrics.append(10)
performance_metrics.append(10)
print("Performance List: ", performance_metrics)
average_actor_weights = federated_aggregation(local_actor_weight_list, performance_metrics)

Performance List:  [10, 10, 10]


In [73]:
len(local_actor_weight_list[0])
local_actor_weight_list

[[array([[-0.22865833, -0.4688779 ,  0.81825066, ...,  0.53074694,
           0.547844  , -0.3013528 ],
         [ 0.48773125, -0.22440106, -0.3552874 , ...,  0.36101028,
          -0.18742278,  0.361116  ],
         [ 0.41001034,  1.0742527 ,  2.4065573 , ..., -0.04344235,
           0.6784884 ,  0.30657816],
         [-2.257237  ,  1.3792734 , -0.22279078, ..., -0.6959624 ,
           1.0233685 ,  0.66327465],
         [ 0.9270111 ,  0.22591174, -0.18504748, ..., -0.5163007 ,
          -1.6459152 , -0.12282398],
         [ 0.6763817 , -1.5025165 ,  0.47156113, ...,  0.7523886 ,
          -0.9428756 , -0.8831225 ]], dtype=float32),
  array([-1.43404627e+00, -1.78873062e-01, -1.22440457e-01, -2.54565835e-01,
          9.71539855e-01, -1.50063372e+00, -1.26875013e-01,  3.10668826e-01,
         -2.60282183e+00, -1.78625524e+00, -4.46442008e-01, -9.45907354e-01,
          1.58965886e-01, -1.55798531e+00,  8.52170229e-01, -5.49731791e-01,
          8.54725480e-01, -1.53600764e+00,  1.03590

In [3]:
num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"], logging=True))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


Unnamed: 0,load_1,pv_1,price,fuelmix
0,1.149,0.0,0.05704,0.530991


In [38]:
federated_rounds = 5
num_clusters = 18
# 2, 6, 10, 12, 14, 16, 18

y = np.loadtxt(f'../../data/3final_data/Clusters_KMeans_dtw_c{num_clusters}.csv', delimiter=',').astype(int)

cluster_buildings = {i: [] for i in range(num_clusters)}

# Iterate through each cluster
for cluster_number in range(num_clusters):
    buildings_in_cluster = np.where(y == cluster_number)[0] +1
    cluster_buildings[cluster_number] = buildings_in_cluster
cluster_buildings

{0: array([7], dtype=int64),
 1: array([17], dtype=int64),
 2: array([13, 19, 20], dtype=int64),
 3: array([18], dtype=int64),
 4: array([ 9, 30], dtype=int64),
 5: array([1], dtype=int64),
 6: array([6], dtype=int64),
 7: array([11], dtype=int64),
 8: array([23, 24, 26, 28, 29], dtype=int64),
 9: array([2], dtype=int64),
 10: array([8], dtype=int64),
 11: array([21], dtype=int64),
 12: array([ 3,  4, 14, 15, 22], dtype=int64),
 13: array([16], dtype=int64),
 14: array([10], dtype=int64),
 15: array([27], dtype=int64),
 16: array([25], dtype=int64),
 17: array([ 5, 12], dtype=int64)}

In [6]:
# Setup Agent networks
batch_size = 128
replay_buffer_capacity = 20000 #-> only <18.000 samples per dataset
initial_collect_steps = 2000
collect_steps_per_iteration = 20 
num_iterations = 10000 #10000
eval_interval = 9500 #3000

In [7]:
global_weights = {"actor_net": {}, "critic_net": {}, "target_actor_network": {}, "target_critic_network": {}}

#Initalize a global model for each Cluster of similar buildings
for cluster in range(num_clusters):
        # 1. Build global agent per cluster
        tf.compat.v1.reset_default_graph()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        first_building_in_cluster = cluster_buildings[cluster][0]

        global_tf_agent, global_eval_policy, global_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
                action_spec = environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step,
                environments = environments,
                )

        # 2. Initially store weights
        global_weights["actor_net"][cluster] = global_tf_agent._actor_network.get_weights()
        global_weights["critic_net"][cluster] = global_tf_agent._critic_network.get_weights()
        global_weights["target_actor_network"][cluster] = global_tf_agent._target_actor_network.get_weights()
        global_weights["target_critic_network"][cluster] = global_tf_agent._target_critic_network.get_weights()

        model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster}/FLround{0}_c{num_clusters}_WAAwN_test")
        os.makedirs(model_dir, exist_ok=True)

        np.savez(os.path.join(model_dir, "actor_network_weights.npz"), *global_tf_agent._actor_network.get_weights())
        np.savez(os.path.join(model_dir, "critic_weights.npz"), *global_tf_agent._critic_network.get_weights())
        np.savez(os.path.join(model_dir, "target_actor_weights.npz"), *global_tf_agent._target_actor_network.get_weights())
        np.savez(os.path.join(model_dir, "target_critic_weights.npz"), *global_tf_agent._target_critic_network.get_weights())

In [11]:
#Start Federated Learning - For each federated round
for federated_round  in range(federated_rounds):
    
    #Iterate through each cluster
    for cluster_number, buildings_in_cluster in cluster_buildings.items():
        print(f"Cluster {cluster_number}: Buildings {buildings_in_cluster} Federated round ----------", federated_round+1, f"/ {federated_rounds}")
        
        local_actor_weight_list = list()
        local_critic_weight_list = list()
        local_target_actor_weight_list = list()
        local_target_critic_weight_list = list()

        performance_metrics = list()

        #Iterate through the buildings per cluster
        for building_index in buildings_in_cluster:
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent and set global weights
            local_tf_agent, local_eval_policy, local_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step,
                environments = environments,
                )
            
            model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{federated_round}_c{num_clusters}_WAAwN_test")
            
            with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

            #2. Prepare training pipeline: Setup iterator, replay buffer, driver
            local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
                local_tf_agent, environments["train"][f"building_{building_index}"], 
                replay_buffer_capacity, local_collect_policy, initial_collect_steps, 
                collect_steps_per_iteration, batch_size
                )

            #3. Setup wandb logging
            #artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{federated_round+1}", num_iterations=num_iterations)

            #4. Start training
            #print(f"Start training building {building_index+1} - Round {federated_round+1}")
            
            eval_metrics = [tf_metrics.AverageReturnMetric()]
            test_metrics = [tf_metrics.AverageReturnMetric()]

            while global_step.numpy() < num_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                local_time_step, local_policy_state = local_collect_driver.run(time_step=local_time_step, policy_state=local_policy_state)
                local_experience, _ = next(local_iterator)
                local_train_loss = local_tf_agent.train(local_experience)
                
                """metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        local_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)"""
                
                
                #performance_metrics.append()
                #if global_step.numpy() % 2 == 0:
                #    metrics["loss"] = local_train_loss.loss
                #    wandb.log(metrics)
            
            metrics = metric_utils.eager_compute(test_metrics,environments["eval"][f"building_{building_index}"], local_eval_policy, num_episodes=1)
            print("Return: ", metrics["AverageReturn"].numpy())
            performance_metrics.append(metrics["AverageReturn"].numpy())
            
            #5. Add local agent weights to list
            local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())
            local_critic_weight_list.append(local_tf_agent._critic_network.get_weights())
            local_target_actor_weight_list.append(local_tf_agent._target_actor_network.get_weights())
            local_target_critic_weight_list.append(local_tf_agent._target_critic_network.get_weights())

        # Performe Federated aggregation
        print("Performance List: ", performance_metrics)
        average_actor_weights = federated_aggregation(local_actor_weight_list, performance_metrics)
        average_critic_weights = federated_aggregation(local_critic_weight_list, performance_metrics) 
        average_target_actor_weights = federated_aggregation(local_target_actor_weight_list, performance_metrics) 
        average_target_critic_weights = federated_aggregation(local_target_critic_weight_list, performance_metrics)    
        
        model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{federated_round+1}_c{num_clusters}_WAAwN_test")
        os.makedirs(model_dir, exist_ok=True)

        np.savez(os.path.join(model_dir, "actor_network_weights.npz"), *average_actor_weights)
        np.savez(os.path.join(model_dir, "critic_weights.npz"), *average_critic_weights)
        np.savez(os.path.join(model_dir, "target_actor_weights.npz"), *average_target_actor_weights)
        np.savez(os.path.join(model_dir, "target_critic_weights.npz"), *average_target_critic_weights)

Cluster 0: Buildings [7] Federated round ---------- 1 / 5
Return:  4947.773
Performance List:  [4947.773]
Cluster 1: Buildings [17] Federated round ---------- 1 / 5
Return:  4138.058
Performance List:  [4138.058]
Cluster 2: Buildings [13 19 20] Federated round ---------- 1 / 5
Return:  4982.4443
Return:  4877.625
Return:  4795.5405
Performance List:  [4982.4443, 4877.625, 4795.5405]
Cluster 3: Buildings [18] Federated round ---------- 1 / 5
Return:  4237.4927
Performance List:  [4237.4927]
Cluster 4: Buildings [ 9 30] Federated round ---------- 1 / 5
Return:  4459.204
Return:  4678.903
Performance List:  [4459.204, 4678.903]
Cluster 5: Buildings [1] Federated round ---------- 1 / 5
Return:  5415.0923
Performance List:  [5415.0923]
Cluster 6: Buildings [6] Federated round ---------- 1 / 5
Return:  4087.658
Performance List:  [4087.658]
Cluster 7: Buildings [11] Federated round ---------- 1 / 5
Return:  4493.451
Performance List:  [4493.451]
Cluster 8: Buildings [23 24 26 28 29] Federate

In [12]:
num_rounds=1
num_test_iterations = 10000 # Ab 12 clustern, vorher 5000

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for cluster_number, buildings_in_cluster in cluster_buildings.items():

    for building_index in buildings_in_cluster:
        
        for round in range(num_rounds):
            print("Cluster: ", cluster_number, " - Building: ", building_index, " - round: ", round)
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()

            #1. Initalize local agent and set trained global weights
            federated_tf_agent, federated_eval_policy, federated_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step,
                environments = environments,
                )
            
            model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{3}_c{num_clusters}_WAAwN_test")
            with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            #Setup iterator, replay buffer, driver
            iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
                federated_tf_agent, environments["train"][f"building_{building_index}"], 
                replay_buffer_capacity, federated_collect_policy, initial_collect_steps, 
                collect_steps_per_iteration, batch_size
                )
            
            #Setup wandb logging
            artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{round}", num_iterations=num_iterations)
            
            #2. Train and evaluate
            eval_metrics = [tf_metrics.AverageReturnMetric()]
            test_metrics = [tf_metrics.AverageReturnMetric()]

            while global_step.numpy() < num_test_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                time_step, policy_state = collect_driver.run(time_step=time_step, policy_state=policy_state)
                experience, _ = next(iterator)
                train_loss = federated_tf_agent.train(experience)
                                
                metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        federated_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)
                
                if global_step.numpy() % 2 == 0:
                    metrics["loss"] = train_loss.loss
                    wandb.log(metrics)

            #3. Start testing
            metrics = metric_utils.eager_compute(test_metrics,environments["test"][f"building_{building_index}"], federated_eval_policy, num_episodes=1)
            print('Building: ', building_index, ' - Total Profit: ', wandb.summary["Final Profit"])
            result_df = pd.concat([result_df, pd.DataFrame({'Building': [building_index], 'Total Profit': [wandb.summary["Final Profit"]]})], ignore_index=True)
            wandb.log(metrics)
            #artifact.add_dir(local_path='checkpoints/ddpg/')
            wandb.log_artifact(artifact)
            wandb.finish()

Cluster:  0  - Building:  7  - round:  0
Building:  7  - Total Profit:  -32.98025289999993
Cluster:  1  - Building:  17  - round:  0
Building:  17  - Total Profit:  -368.7726228178648
Cluster:  2  - Building:  13  - round:  0
Building:  13  - Total Profit:  -40.75914236999963
Cluster:  2  - Building:  19  - round:  0
Building:  19  - Total Profit:  -30.16532673
Cluster:  2  - Building:  20  - round:  0
Building:  20  - Total Profit:  -256.3630751799992
Cluster:  3  - Building:  18  - round:  0
Building:  18  - Total Profit:  -311.63190151000015
Cluster:  4  - Building:  9  - round:  0
Building:  9  - Total Profit:  -134.22545635999936
Cluster:  4  - Building:  30  - round:  0
Building:  30  - Total Profit:  -101.18343558999989
Cluster:  5  - Building:  1  - round:  0
Building:  1  - Total Profit:  -25.610377677865184
Cluster:  6  - Building:  6  - round:  0
Building:  6  - Total Profit:  -596.944248857862
Cluster:  7  - Building:  11  - round:  0
Building:  11  - Total Profit:  -193.07

In [13]:
# Rename column
result_df.rename(columns={'Total Profit': 'Profit'}, inplace=True)
result_df['Setup'] = f'cluster{num_clusters}'
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)

In [14]:
result_df.to_csv(f'results_clusters{num_clusters}_df.csv', index=False)
print("Final reuslt - clusters ", num_clusters, ": ", result_df["Profit"].sum())

Final reuslt - clusters  18 :  -5039.646180114189


In [15]:
print("Cluster 2: ", pd.read_csv("results_clusters2_df.csv")["Profit"].sum())
print("Cluster 6: ", pd.read_csv("results_clusters6_df.csv")["Profit"].sum())
print("Cluster 10: ", pd.read_csv("results_clusters10_df.csv")["Profit"].sum())
print("Cluster 12: ", pd.read_csv("results_clusters12_df.csv")["Profit"].sum())
print("Cluster 14: ", pd.read_csv("results_clusters14_df.csv")["Profit"].sum())

Cluster 2:  -5884.21237239078
Cluster 6:  -5892.741336364493
Cluster 10:  -5469.377397423285
Cluster 12:  -5469.109807054782
Cluster 14:  -5053.687516458025
