In [1]:
# Imports

import logging
import os
logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np


import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
#from tf_agents.agents.ddpg import ddpg_agent
#from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
#from tf_agents.replay_buffers import tf_uniform_replay_buffer
#from tf_agents.utils import common
import matplotlib.pyplot as plt
import wandb

import sys
sys.path.insert(0, '..')
from environments.EnergyManagementEnv import EnergyManagementEnv
from utils.agentNetworks import ActorNetwork, CriticNetwork, CustomLayers
#import utils.dataloader as DL
from utils.federatedLearningHandler import *
from utils.federatedAggregation import FederatedAggregation





In [2]:
#Create Environments

num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"], logging=True))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


Unnamed: 0,load_1,pv_1,price,fuelmix
0,1.149,0.0,0.05704,0.530991


In [3]:
# Clustering
num_clusters = 10 # 2, 6, 10, 12, 14, 16, 18

# Setup Agent networks
federated_rounds = 5
batch_size = 128
replay_buffer_capacity = 20000 #-> only <18.000 samples per dataset
initial_collect_steps = 2000
collect_steps_per_iteration = 20 
num_iterations = 10000 #10000
eval_interval = 9500 #3000


y = np.loadtxt(f'../../data/3final_data/Clusters_KMeans_dtw_c{num_clusters}.csv', delimiter=',').astype(int)
cluster_buildings = {i: [] for i in range(num_clusters)}

# Iterate through each cluster
for cluster_number in range(num_clusters):
    buildings_in_cluster = np.where(y == cluster_number)[0] +1
    cluster_buildings[cluster_number] = buildings_in_cluster
cluster_buildings

{0: array([7], dtype=int64),
 1: array([16, 17, 21], dtype=int64),
 2: array([13, 19, 20], dtype=int64),
 3: array([18], dtype=int64),
 4: array([ 3,  4,  9, 14, 15, 22, 30], dtype=int64),
 5: array([1], dtype=int64),
 6: array([6, 8], dtype=int64),
 7: array([11], dtype=int64),
 8: array([ 5, 12, 23, 24, 25, 26, 27, 28, 29], dtype=int64),
 9: array([ 2, 10], dtype=int64)}

In [4]:
# Initial Federated Learning Round -> Setup Global models per cluster
global_weights = {"actor_net": {}, "critic_net": {}, "target_actor_network": {}, "target_critic_network": {}}

#Initalize a global model for each Cluster of similar buildings
for cluster in range(num_clusters):
        # 1. Build global agent per cluster
        tf.compat.v1.reset_default_graph()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        first_building_in_cluster = cluster_buildings[cluster][0]

        global_tf_agent, global_eval_policy, global_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{first_building_in_cluster}"].observation_spec(),
                action_spec = environments["train"][f"building_{first_building_in_cluster}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step,
                environments = environments,
                )

        # 2. Initially store weights
        global_weights["actor_net"][cluster] = global_tf_agent._actor_network.get_weights()
        global_weights["critic_net"][cluster] = global_tf_agent._critic_network.get_weights()
        global_weights["target_actor_network"][cluster] = global_tf_agent._target_actor_network.get_weights()
        global_weights["target_critic_network"][cluster] = global_tf_agent._target_critic_network.get_weights()

        model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster}/FLround{0}_c{num_clusters}_AvgAgg")
        os.makedirs(model_dir, exist_ok=True)

        np.savez(os.path.join(model_dir, "actor_network_weights.npz"), *global_tf_agent._actor_network.get_weights())
        np.savez(os.path.join(model_dir, "critic_weights.npz"), *global_tf_agent._critic_network.get_weights())
        np.savez(os.path.join(model_dir, "target_actor_weights.npz"), *global_tf_agent._target_actor_network.get_weights())
        np.savez(os.path.join(model_dir, "target_critic_weights.npz"), *global_tf_agent._target_critic_network.get_weights())

In [5]:
#Start Federated Learning - For each federated round
for federated_round  in range(federated_rounds):
    
    #Iterate through each cluster
    for cluster_number, buildings_in_cluster in cluster_buildings.items():
        print(f"Cluster {cluster_number}: Buildings {buildings_in_cluster} Federated round ----------", federated_round+1, f"/ {federated_rounds}")
        
        local_actor_weight_list = list()
        local_critic_weight_list = list()
        local_target_actor_weight_list = list()
        local_target_critic_weight_list = list()

        performance_metrics = list()

        #Iterate through the buildings per cluster
        for building_index in buildings_in_cluster:
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent and set global weights
            local_tf_agent, local_eval_policy, local_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step,
                environments = environments,
                )
            
            model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{federated_round}_c{num_clusters}_AvgAgg")
            
            with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                local_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])

            #2. Prepare training pipeline: Setup iterator, replay buffer, driver
            local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
                local_tf_agent, environments["train"][f"building_{building_index}"], 
                replay_buffer_capacity, local_collect_policy, initial_collect_steps, 
                collect_steps_per_iteration, batch_size
                )

            #3. Setup wandb logging
            #artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{federated_round+1}", num_iterations=num_iterations)

            #4. Start training
            #print(f"Start training building {building_index+1} - Round {federated_round+1}")
            
            eval_metrics = [tf_metrics.AverageReturnMetric()]
            test_metrics = [tf_metrics.AverageReturnMetric()]

            while global_step.numpy() < num_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                local_time_step, local_policy_state = local_collect_driver.run(time_step=local_time_step, policy_state=local_policy_state)
                local_experience, _ = next(local_iterator)
                local_train_loss = local_tf_agent.train(local_experience)
                
                """metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        local_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)"""
                
                
                #performance_metrics.append()
                #if global_step.numpy() % 2 == 0:
                #    metrics["loss"] = local_train_loss.loss
                #    wandb.log(metrics)
            
            metrics = metric_utils.eager_compute(test_metrics,environments["eval"][f"building_{building_index}"], 
                                                 local_eval_policy, num_episodes=1)
            print("Return: ", metrics["AverageReturn"].numpy())
            performance_metrics.append(metrics["AverageReturn"].numpy())
            
            #5. Add local agent weights to list
            local_actor_weight_list.append(local_tf_agent._actor_network.get_weights())
            local_critic_weight_list.append(local_tf_agent._critic_network.get_weights())
            local_target_actor_weight_list.append(local_tf_agent._target_actor_network.get_weights())
            local_target_critic_weight_list.append(local_tf_agent._target_critic_network.get_weights())

        # Performe Federated aggregation
        print("Performance List: ", performance_metrics)
        average_actor_weights = FederatedAggregation.federated_weigthed_aggregation(local_actor_weight_list, performance_metrics)
        average_critic_weights = FederatedAggregation.federated_weigthed_aggregation(local_critic_weight_list, performance_metrics) 
        average_target_actor_weights = FederatedAggregation.federated_weigthed_aggregation(local_target_actor_weight_list, performance_metrics) 
        average_target_critic_weights = FederatedAggregation.federated_weigthed_aggregation(local_target_critic_weight_list, performance_metrics)    
        
        model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{federated_round+1}_c{num_clusters}_AvgAgg")
        os.makedirs(model_dir, exist_ok=True)

        np.savez(os.path.join(model_dir, "actor_network_weights.npz"), *average_actor_weights)
        np.savez(os.path.join(model_dir, "critic_weights.npz"), *average_critic_weights)
        np.savez(os.path.join(model_dir, "target_actor_weights.npz"), *average_target_actor_weights)
        np.savez(os.path.join(model_dir, "target_critic_weights.npz"), *average_target_critic_weights)

Cluster 0: Buildings [7] Federated round ---------- 1 / 5
Return:  4950.34
Performance List:  [4950.34]
Cluster 1: Buildings [16 17 21] Federated round ---------- 1 / 5
Return:  4313.71
Return:  4168.073
Return:  3503.0078
Performance List:  [4313.71, 4168.073, 3503.0078]
Cluster 2: Buildings [13 19 20] Federated round ---------- 1 / 5
Return:  4940.148
Return:  4798.8726
Return:  4680.468
Performance List:  [4940.148, 4798.8726, 4680.468]
Cluster 3: Buildings [18] Federated round ---------- 1 / 5
Return:  4224.5947
Performance List:  [4224.5947]
Cluster 4: Buildings [ 3  4  9 14 15 22 30] Federated round ---------- 1 / 5
Return:  4701.213
Return:  4386.02
Return:  4458.8794
Return:  4852.931
Return:  4613.528
Return:  4428.125
Return:  4630.5806
Performance List:  [4701.213, 4386.02, 4458.8794, 4852.931, 4613.528, 4428.125, 4630.5806]
Cluster 5: Buildings [1] Federated round ---------- 1 / 5
Return:  5434.9473
Performance List:  [5434.9473]
Cluster 6: Buildings [6 8] Federated round -

ValueError: Layer ActorNetwork weight shape (256,) is not compatible with provided weight shape (1, 256).

In [None]:
num_rounds=1
num_test_iterations = 8000 # Ab 12 clustern, vorher 5000

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for cluster_number, buildings_in_cluster in cluster_buildings.items():

    for building_index in buildings_in_cluster:
        
        for round in range(num_rounds):
            print("Cluster: ", cluster_number, " - Building: ", building_index, " - round: ", round)
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()

            #1. Initalize local agent and set trained global weights
            federated_tf_agent, federated_eval_policy, federated_collect_policy = get_ddpg_agent(
                observation_spec = environments["train"][f"building_{building_index}"].observation_spec(),
                action_spec = environments["train"][f"building_{building_index}"].action_spec(),
                custom_layers = [CustomLayers.get_dense_layers(layers=1, units=32)],
                global_step = global_step,
                environments = environments,
                )
            
            model_dir = os.path.join(os.getcwd(), f"models/cluster_{cluster_number}/FLround{3}_c{num_clusters}_AvgAgg")
            with np.load(os.path.join(model_dir, "actor_network_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_actor_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._target_actor_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            with np.load(os.path.join(model_dir, "target_critic_weights.npz"), allow_pickle=True) as data:
                # Extract the arrays using the keys corresponding to their order
                federated_tf_agent._target_critic_network.set_weights([data[f'arr_{i}'] for i in range(len(data.files))])
            
            #Setup iterator, replay buffer, driver
            iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
                federated_tf_agent, environments["train"][f"building_{building_index}"], 
                replay_buffer_capacity, federated_collect_policy, initial_collect_steps, 
                collect_steps_per_iteration, batch_size
                )
            
            #Setup wandb logging
            artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{round}", num_iterations=num_iterations)
            
            #2. Train and evaluate
            eval_metrics = [tf_metrics.AverageReturnMetric()]
            test_metrics = [tf_metrics.AverageReturnMetric()]

            while global_step.numpy() < num_test_iterations:

                #if global_step.numpy() % 50 == 0:
                #    print(global_step.numpy(), "/ ", num_iterations)

                time_step, policy_state = collect_driver.run(time_step=time_step, policy_state=policy_state)
                experience, _ = next(iterator)
                train_loss = federated_tf_agent.train(experience)
                                
                metrics = {}
                if global_step.numpy() % eval_interval == 0:
                    #train_checkpointer.save(global_step)
                    metrics = metric_utils.eager_compute(eval_metrics,environments["eval"][f"building_{building_index}"],
                        federated_eval_policy,num_episodes=1,train_step=global_step,summary_writer=None,summary_prefix='',use_function=True)
                
                if global_step.numpy() % 2 == 0:
                    metrics["loss"] = train_loss.loss
                    wandb.log(metrics)

            #3. Start testing
            metrics = metric_utils.eager_compute(test_metrics,environments["test"][f"building_{building_index}"], federated_eval_policy, num_episodes=1)
            print('Building: ', building_index, ' - Total Profit: ', wandb.summary["Final Profit"])
            result_df = pd.concat([result_df, pd.DataFrame({'Building': [building_index], 'Total Profit': [wandb.summary["Final Profit"]]})], ignore_index=True)
            wandb.log(metrics)
            #artifact.add_dir(local_path='checkpoints/ddpg/')
            wandb.log_artifact(artifact)
            wandb.finish()

Cluster:  0  - Building:  7  - round:  0
Building:  7  - Total Profit:  -32.98025289999993
Cluster:  1  - Building:  17  - round:  0
Building:  17  - Total Profit:  -368.7726228178648
Cluster:  2  - Building:  13  - round:  0
Building:  13  - Total Profit:  -40.75914236999963
Cluster:  2  - Building:  19  - round:  0
Building:  19  - Total Profit:  -30.16532673
Cluster:  2  - Building:  20  - round:  0
Building:  20  - Total Profit:  -256.3630751799992
Cluster:  3  - Building:  18  - round:  0
Building:  18  - Total Profit:  -311.63190151000015
Cluster:  4  - Building:  9  - round:  0
Building:  9  - Total Profit:  -134.22545635999936
Cluster:  4  - Building:  30  - round:  0
Building:  30  - Total Profit:  -101.18343558999989
Cluster:  5  - Building:  1  - round:  0
Building:  1  - Total Profit:  -25.610377677865184
Cluster:  6  - Building:  6  - round:  0
Building:  6  - Total Profit:  -596.944248857862
Cluster:  7  - Building:  11  - round:  0
Building:  11  - Total Profit:  -193.07

Final reuslt - clusters  18 :  -5039.646180114189


In [None]:
print("Cluster 2: ", pd.read_csv("results_clusters2_df.csv")["Profit"].sum())
print("Cluster 6: ", pd.read_csv("results_clusters6_df.csv")["Profit"].sum())
print("Cluster 10: ", pd.read_csv("results_clusters10_df.csv")["Profit"].sum())
print("Cluster 12: ", pd.read_csv("results_clusters12_df.csv")["Profit"].sum())
print("Cluster 14: ", pd.read_csv("results_clusters14_df.csv")["Profit"].sum())

Cluster 2:  -5884.21237239078
Cluster 6:  -5892.741336364493
Cluster 10:  -5469.377397423285
Cluster 12:  -5469.109807054782
Cluster 14:  -5053.687516458025
