In [1]:
#Imports
import os
import tensorflow as tf

import sys
sys.path.insert(0, '..')
from utils.federatedAggregation import FederatedAggregation
from utils.reinforcementLearningHelper import *
from utils.federatedLearningHelper import *







In [2]:
#Setup Environments of selected buildings for training, evaluation, and testing

environments, observation_spec, action_spec  = setup_energymanagement_environments(num_buildings=30)

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_1"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 22 / Action Space: 1 / Upper bound: 2.3


In [3]:
def prosumption_clustered_buildings(num_clusters=10):

    # Catch non-clustered cluster sizes
    if num_clusters < 2 or num_clusters > 20:
        print("Currently, clustering has been done from cluster sizes within the range of 2 to 20.")
        return

    with open(f'../../data/3final_data/cluster_labels.pkl', 'rb') as file:
        cluster_data = pickle.load(file)

    # Retrieve cluster data from the provided dictionary
    cluster_data = cluster_data[num_clusters]

    # Iterate through each cluster
    clustered_buildings = {i: [] for i in range(num_clusters)}
    for cluster_number in range(num_clusters):
        # Find indices of buildings in the current cluster
        buildings_in_cluster = np.where(cluster_data == cluster_number)[0] + 1
        clustered_buildings[cluster_number] = buildings_in_cluster

    return clustered_buildings

In [4]:
# Cluster similar buildings (K-Means with DTW)

#Set parameter
num_clusters = 8 # 2, 6, 10, 12, 14, 16, 18

clustered_buildings = prosumption_clustered_buildings(num_clusters)
clustered_buildings

{0: array([7], dtype=int64),
 1: array([16, 19, 24], dtype=int64),
 2: array([ 1, 11, 12, 27], dtype=int64),
 3: array([ 2,  4,  6,  9, 10, 14, 15, 18, 22, 25, 30], dtype=int64),
 4: array([20, 26, 28], dtype=int64),
 5: array([13, 17, 21, 23, 29], dtype=int64),
 6: array([8], dtype=int64),
 7: array([3, 5], dtype=int64)}

In [5]:
# Setup Agent networks
federated_rounds = 20
batch_size = 128
replay_buffer_capacity = 20000 #-> only <18.000 samples per dataset
initial_collect_steps = 2000 #2000
collect_steps_per_iteration = 30 
num_iterations = 10000 #10000
eval_interval = num_iterations - 10 #9999

In [6]:
# FEDERATED LEARNING - Initalization Round 0

tf.compat.v1.reset_default_graph()
global_step = tf.compat.v1.train.get_or_create_global_step()

#Initalize a global model for each Cluster of similar buildings
for cluster in range(num_clusters):
        
        # 1. Build global agent per cluster
        first_building_in_cluster = clustered_buildings[cluster][0]
        global_sac_agent, global_eval_policy, global_collect_policy = initialize_sac_agent(
                observation_spec=observation_spec, action_spec=action_spec, global_step=global_step, environments=environments,
                )

        # 2. Initially store weights
        model_dir = os.path.join(os.getcwd(), f"models/sac/cluster_{cluster}/FLround{0}_c{num_clusters}_AvgAgg")
        os.makedirs(model_dir, exist_ok=True)
        
        save_sac_weights(global_sac_agent, model_dir)




In [7]:
#FEDERATED LEARNING - Model training for multiple Rounds

#For each federated round and cluster
for federated_round  in range(federated_rounds):
    for cluster_number, buildings_in_cluster in clustered_buildings.items():

        #Iterate through the buildings per cluster
        print(f"Cluster {cluster_number}: Buildings {buildings_in_cluster} Federated round ---", federated_round+1, f"/ {federated_rounds}")
        local_storage = {
            "actor_weights": [], "critic_weights_1": [], "critic_weights_2": [], "performance_metrics": []
            }
        
        for building_index in buildings_in_cluster:
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent
            local_sac_agent, local_eval_policy, local_collect_policy = initialize_sac_agent(
                observation_spec = observation_spec, action_spec = action_spec,
                global_step = global_step, environments = environments,
                )
            
            #2. Set global weights of this training round to agent (loads the weights of last training)
            model_dir = os.path.join(os.getcwd(), f"models/sac/cluster_{cluster_number}/FLround{federated_round}_c{num_clusters}_AvgAgg")
            local_sac_agent = set_weights_to_sac_agent(local_sac_agent, model_dir)
                        
            #3. Prepare training pipeline: Setup iterator, replay buffer, driver
            local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
                local_sac_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,
                local_collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size
                )
            
            #4. Train, evaluate agent and store weights
            local_sac_agent, local_storage = local_agent_training_and_evaluation(
                local_iterator, local_collect_driver, local_time_step, local_policy_state, global_step, 
                local_sac_agent, local_eval_policy, local_storage, building_index, num_iterations, environments, agent_type="sac"
                )           

        # Performe Federated aggregation
        average_actor_weights = FederatedAggregation.federated_weigthed_aggregation(local_storage["actor_weights"], local_storage["performance_metrics"])
        average_critic_weights_1 = FederatedAggregation.federated_weigthed_aggregation(local_storage["critic_weights_1"], local_storage["performance_metrics"]) 
        average_critic_weights_2 = FederatedAggregation.federated_weigthed_aggregation(local_storage["critic_weights_2"], local_storage["performance_metrics"]) 
         
        
        #Save federated weights for next round (Round + 1)
        model_dir = os.path.join(os.getcwd(), f"models/sac/cluster_{cluster_number}/FLround{federated_round+1}_c{num_clusters}_AvgAgg")
        os.makedirs(model_dir, exist_ok=True)
        save_federated_sac_weights(model_dir, average_actor_weights, average_critic_weights_1, average_critic_weights_2)

Cluster 0: Buildings [7] Federated round --- 1 / 20
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
Return:  -5222.7183
Cluster 1: Buildings [16 19 24] Federated round --- 1 / 20
Return:  -9429.145
Return:  -2512.1313
Return:  -3058.525
Cluster 2: Buildings [ 1 11 12 27] Federated round --- 1 / 20
Return:  -5842.421
Return:  -6157.3926
Return:  -5833.8384
Return:  -6602.2607
Cluster 3: Buildings [ 2  4  6  9 10 14 15 18 22 25 30] Federated round --- 1 / 20
Return:  -8577.111
Return:  -5417.4683
Return:  -12159.062
Return:  -5398.028
Return:  -7220.0005
Return:  -4448.565
Return:  -3585.9705
Return:  -8155.7627
Return:  -4679.426
Return:  -7018.429
Return:  -3824.3472
Cluster 4: Buildings [20 26 28] Federated round --- 1 / 20
Return:  -4562.9116
Return:  -7955.6807
Return:  -3830.0774
Cluster 5: Buildings [13 17 21 23 29] Federated round --- 1 / 20
Return:  -4240.0615
Return:  -10672.494
Return:  -11616.864
Return:  -5508.7666
Return:  -5709.633

In [8]:
# LOCAL REFITTING AND EVALUATION

best_federated_round = 20
num_rounds=3
num_test_iterations = 1000

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for cluster_number, buildings_in_cluster in clustered_buildings.items():
    for building_index in buildings_in_cluster:
        
        for round in range(num_rounds):
            print("Cluster: ", cluster_number, " - Building: ", building_index, " - round: ", round)
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()

            #1. Initalize local agent
            tf_sac_agent, eval_policy, collect_policy = initialize_sac_agent(
                observation_spec = observation_spec, action_spec = action_spec,
                global_step = global_step, environments = environments,
                )
            
            #2. Set global weights of this training round to agent (loads the weights of last training)
            model_dir = os.path.join(os.getcwd(), f"models/sac/cluster_{cluster_number}/FLround{best_federated_round}_c{num_clusters}_AvgAgg")
            tf_sac_agent = set_weights_to_sac_agent(tf_sac_agent, model_dir)

            #3. Prepare training pipeline: Setup iterator, replay buffer, driver
            iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
                tf_sac_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,
                collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size
                )
            
            #4. Setup wandb logging
            artifact = initialize_wandb_logging(name=f"Exp_building{building_index}_rd{round}", num_iterations=num_iterations)
            
            #5. Train, evaluate agent and store weights
            result_df, metrics = agent_training_and_evaluation(global_step, num_test_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_sac_agent, eval_policy, building_index, result_df, eval_interval, environments)
            
            #6. End and log wandb
            end_and_log_wandb(metrics, artifact)

Cluster:  0  - Building:  7  - round:  0


  result_df = pd.concat([result_df, pd.DataFrame({'Building': [building_index], 'Total Profit': [wandb.summary["Final Profit"]]})], ignore_index=True)


Building:  7  - Total Profit:  -1043.3022211881632
Cluster:  0  - Building:  7  - round:  1
Building:  7  - Total Profit:  -1037.3336460740295
Cluster:  0  - Building:  7  - round:  2
Building:  7  - Total Profit:  -1088.1328369545465
Cluster:  1  - Building:  16  - round:  0
Building:  16  - Total Profit:  -1342.7109289708567
Cluster:  1  - Building:  16  - round:  1
Building:  16  - Total Profit:  -1322.7196662456945
Cluster:  1  - Building:  16  - round:  2
Building:  16  - Total Profit:  -1382.8312596863748
Cluster:  1  - Building:  19  - round:  0
Building:  19  - Total Profit:  -655.8248149748878
Cluster:  1  - Building:  19  - round:  1
Building:  19  - Total Profit:  -660.1114214742954
Cluster:  1  - Building:  19  - round:  2
Building:  19  - Total Profit:  -647.6421464732367
Cluster:  1  - Building:  24  - round:  0
Building:  24  - Total Profit:  -683.9074165094362
Cluster:  1  - Building:  24  - round:  1
Building:  24  - Total Profit:  -682.9660454462313
Cluster:  1  - Bui

In [9]:
# Save results
result_df.rename(columns={'Total Profit': 'Profit'}, inplace=True)
result_df['Setup'] = 'TD3_LL'
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)
os.makedirs('results', exist_ok=True)
result_df.to_csv(f'results/SAC_FL_results_setting_A_r20.csv', index=False)
print("Final reuslt: ", result_df["Profit"].sum())

Final reuslt:  -125237.74219500861


In [10]:
import requests

def send_telegram_message(bot_token, chat_id, message):
    """Send a message to a Telegram chat via the Bot API."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {
        "chat_id": chat_id,
        "text": message,
        "parse_mode": "Markdown"
    }
    response = requests.post(url, json=payload)
    return response.json()

# Use the function
bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
chat_id = os.getenv('TELEGRAM_CHAT_ID')
message = f"Script SAC FL ist fertig - IP .81!"

result = send_telegram_message(bot_token, chat_id, message)
print(result)

{'ok': True, 'result': {'message_id': 18, 'from': {'id': 7071194232, 'is_bot': True, 'first_name': 'Reinforcement Learning', 'username': 'FederatedRL_Bot'}, 'chat': {'id': 5493937056, 'first_name': 'Jonas', 'last_name': 'Sievers', 'username': 'JonasSievers', 'type': 'private'}, 'date': 1714961617, 'text': 'Script SAC FL ist fertig - IP .81!'}}
