In [1]:
#Imports
import os
import tensorflow as tf

import sys
sys.path.insert(0, '..')
from utils.federatedAggregation import FederatedAggregation
from utils.reinforcementLearningHelper import *
from utils.federatedLearningHelper import *







# Federated Learning

## 1.1 Normal Federated Learning for the Buildings 1 - 30 (1 Cluster, Only Emissions)

In [2]:
#Setup Environments of selected buildings for training, evaluation, and testing
environments, observation_spec, action_spec  = setup_energymanagement_environments2(num_buildings=30, ecoPriority=0, noise_scale=0)

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_1"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 58 / Action Space: 1 / Upper bound: 2.3


In [3]:
first_building=1
num_clusters = 1

clustered_buildings = {0 : [first_building + i for i in range(0,30)]}


# Setup Agent networks
federated_rounds = 3
batch_size = 128
replay_buffer_capacity = 20000 #-> only <18.000 samples per dataset
initial_collect_steps = 2000 #2000
collect_steps_per_iteration = 30 
num_iterations = 5000 
eval_interval = num_iterations-1

In [4]:
# FEDERATED LEARNING - Initalization Round 0

tf.compat.v1.reset_default_graph()
global_step = tf.compat.v1.train.get_or_create_global_step()

#Initalize a global model for each Cluster of similar buildings
for cluster in range(num_clusters):
        
        # 1. Build global agent per cluster
        global_ddpg_agent, global_eval_policy, global_collect_policy = initialize_ddpg_agent(
                observation_spec=observation_spec, action_spec=action_spec, global_step=global_step, environments=environments,
                )

        # 2. Initially store weights
        model_dir = os.path.join(os.getcwd(), f"models/zeroshot/ddpg/cluster_{cluster}/FLround{0}_c{num_clusters}_wAgg")
        os.makedirs(model_dir, exist_ok=True)
        
        save_ddpg_weights(global_ddpg_agent, model_dir)


#FEDERATED LEARNING - Model training for multiple Rounds

#For each federated round and cluster
for federated_round  in range(federated_rounds):
    for cluster_number, buildings_in_cluster in clustered_buildings.items():

        #Iterate through the buildings per cluster
        print(f"Cluster {cluster_number}: Buildings {buildings_in_cluster} Federated round ---", federated_round+1, f"/ {federated_rounds}")
        local_storage = {
            "actor_weights": [], "critic_weights": [], "target_actor_weights": [], "target_critic_weights": [],"performance_metrics": []
            }
        
        for building_index in buildings_in_cluster:
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()
            
            #1. Initalize local agent
            local_ddpg_agent, local_eval_policy, local_collect_policy = initialize_ddpg_agent(
                observation_spec = observation_spec, action_spec = action_spec,
                global_step = global_step, environments = environments,
                )
            
            #2. Set global weights of this training round to agent (loads the weights of last training)
            model_dir = os.path.join(os.getcwd(), f"models/zeroshot/ddpg/cluster_{cluster_number}/FLround{federated_round}_c{num_clusters}_wAgg")
            local_ddpg_agent = set_weights_to_ddpg_agent(local_ddpg_agent, model_dir)
            
            #3. Prepare training pipeline: Setup iterator, replay buffer, driver
            local_iterator, local_collect_driver, local_time_step, local_policy_state = setup_rl_training_pipeline(
                local_ddpg_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,
                local_collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size
                )
            
            #4. Train, evaluate agent and store weights
            local_ddpg_agent, local_storage = local_agent_training_and_evaluation(
                local_iterator, local_collect_driver, local_time_step, local_policy_state, global_step, 
                local_ddpg_agent, local_eval_policy, local_storage, building_index, num_iterations, environments, agent_type="ddpg"
                )           

        # Performe Federated aggregation
        average_actor_weights = FederatedAggregation.federated_weigthed_aggregation(local_storage["actor_weights"], local_storage["performance_metrics"])
        average_critic_weights = FederatedAggregation.federated_weigthed_aggregation(local_storage["critic_weights"], local_storage["performance_metrics"]) 
        average_target_actor_weights = FederatedAggregation.federated_weigthed_aggregation(local_storage["target_actor_weights"], local_storage["performance_metrics"]) 
        average_target_critic_weights = FederatedAggregation.federated_weigthed_aggregation(local_storage["target_critic_weights"], local_storage["performance_metrics"])    
        
        #Save federated weights for next round (Round + 1)
        model_dir = os.path.join(os.getcwd(), f"models/zeroshot/ddpg/cluster_{cluster_number}/FLround{federated_round+1}_c{num_clusters}_wAgg")
        os.makedirs(model_dir, exist_ok=True)
        save_federated_ddpg_weights(model_dir, average_actor_weights, average_critic_weights, average_target_actor_weights, average_target_critic_weights)



Cluster 0: Buildings [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] Federated round --- 1 / 3
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
Return:  -4506.3545
Return:  -7215.9663
Return:  -2737.2515
Return:  -4582.319
Return:  -3476.9346
Return:  -9465.438
Return:  -4318.2275
Return:  -12527.788
Return:  -4316.008
Return:  -6420.2417
Return:  -4997.8604
Return:  -5088.2397
Return:  -3009.5503
Return:  -2363.719
Return:  -2510.7593
Return:  -8363.823
Return:  -6952.889
Return:  -7197.966
Return:  -1549.7811
Return:  -5148.7515
Return:  -10940.057
Return:  -3740.0415
Return:  -4786.6475
Return:  -1337.0984
Return:  -5084.8965
Return:  -7174.908
Return:  -5504.871
Return:  -3143.4395
Return:  -4083.3662
Return:  -2997.2341
Cluster 0: Buildings [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] Federated round --- 2 / 3


## 1.2 Test trained models on Buildings 31 to 60 (1 Cluster, Only Emissions)

In [5]:
#Setup Environments of selected buildings for training, evaluation, and testing -> BUILDING 31 to 60

first_building = 31
environments, observation_spec, action_spec  = setup_energymanagement_environments_for_zeroShot(num_buildings=30, ecoPriority=0, noise_scale=0, first_building=first_building)

clustered_buildings = {0 : [first_building + i for i in range(0,30)]}

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_{first_building}"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_{first_building}"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 58 / Action Space: 1 / Upper bound: 2.3


In [6]:
csv_name = f"NEW_FL_ZeroShot_DDPG_OnlyCosts"

In [7]:
best_federated_round = 3
num_rounds=3
num_test_iterations = 1

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for cluster_number, buildings_in_cluster in clustered_buildings.items():
    for building_index in buildings_in_cluster:
        
        for round in range(num_rounds):
            print("Cluster: ", cluster_number, " - Building: ", building_index, " - round: ", round)
            
            #0. Reset global step
            tf.compat.v1.reset_default_graph()
            global_step = tf.compat.v1.train.get_or_create_global_step()

            #1. Initalize local agent
            tf_ddpg_agent, eval_policy, collect_policy = initialize_ddpg_agent(
                observation_spec = observation_spec, action_spec = action_spec,
                global_step = global_step, environments = environments, first_building=first_building
                )
            
            #2. Set global weights of this training round to agent (loads the weights of last training)
            model_dir = os.path.join(os.getcwd(), f"models/zeroshot/ddpg/cluster_{cluster_number}/FLround{best_federated_round}_c{num_clusters}_wAgg")
            tf_ddpg_agent = set_weights_to_ddpg_agent(tf_ddpg_agent, model_dir)

            #3. Prepare training pipeline: Setup iterator, replay buffer, driver
            iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
                tf_ddpg_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,
                collect_policy, initial_collect_steps, collect_steps_per_iteration, batch_size
                )
            
            #4. Setup wandb logging
            artifact = initialize_wandb_logging(name=f"{csv_name}_Home{building_index}_rd{round}", num_iterations=num_test_iterations)
            
            #5. Train, evaluate agent and store weights
            result_df, metrics = agent_training_and_evaluation(global_step, num_test_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_ddpg_agent, eval_policy, building_index, result_df, eval_interval, environments)
            
            #6. End and log wandb
            end_and_log_wandb(metrics, artifact)

Cluster:  0  - Building:  31  - round:  0


  result_df = pd.concat(


Building:  31  - Total Profit:  -703.5175707349844  - Total Emissions:  2585.206542997766
Cluster:  0  - Building:  31  - round:  1
Building:  31  - Total Profit:  -703.7137141797829  - Total Emissions:  2586.446770270959
Cluster:  0  - Building:  31  - round:  2
Building:  31  - Total Profit:  -703.5389260633275  - Total Emissions:  2585.651755430947
Cluster:  0  - Building:  32  - round:  0
Building:  32  - Total Profit:  -756.1475176826204  - Total Emissions:  2738.7707402963924
Cluster:  0  - Building:  32  - round:  1
Building:  32  - Total Profit:  -756.1783043330037  - Total Emissions:  2740.140963991585
Cluster:  0  - Building:  32  - round:  2
Building:  32  - Total Profit:  -755.8419495739294  - Total Emissions:  2736.5309457057247
Cluster:  0  - Building:  33  - round:  0
Building:  33  - Total Profit:  -1238.3894209471257  - Total Emissions:  4620.700394499147
Cluster:  0  - Building:  33  - round:  1
Building:  33  - Total Profit:  -1239.0300895749067  - Total Emissions:  

In [8]:
# Save results
result_df['Setup'] = csv_name
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)
os.makedirs('results', exist_ok=True)
result_df.to_csv(f'results/{csv_name}.csv', index=False)

In [9]:
import pandas as pd

csv_name = f"NEW_FL_ZeroShot_DDPG_OnlyCosts"

result_df = pd.read_csv(f'results/{csv_name}.csv')
result_df['Total Profit'].sum()

-92256.26110073196

In [10]:
std_dev = result_df.groupby('Building')['Total Profit'].std().mean()
std_dev

0.388061494755657

# 2. Local Reinforcement Learning

In [11]:
#Setup Environments of selected buildings for training, evaluation, and testing

environments, observation_spec, action_spec = setup_energymanagement_environments2(num_buildings=30, ecoPriority=0, noise_scale=0)

In [12]:
# Setup Agent networks
SEED = 42
tf.random.set_seed(SEED)

batch_size = 128
replay_buffer_capacity = 20000 
initial_collect_steps = 2000
collect_steps_per_iteration = 30 
num_iterations = 5000
eval_interval = num_iterations-1

num_rounds = 3
num_buildings = 30

In [13]:
# Train local model
local_storage = {"actor_weights": [], "critic_weights": [], "target_actor_weights": [], "target_critic_weights": [],"performance_metrics": []}

for idx in range(1):
    
    building_index=idx+1
    print("Building: ", building_index, " - round: ", round)
    #0. Reset global step
    tf.compat.v1.reset_default_graph()
    global_step = tf.compat.v1.train.get_or_create_global_step()
    
    #1. Initalize agent
    tf_agent, eval_policy, collect_policy = initialize_ddpg_agent(observation_spec, action_spec, global_step, environments)

    #2. Prepare training pipeline: Setup iterator, replay buffer, driver
    iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
        tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,collect_policy, 
        initial_collect_steps, collect_steps_per_iteration, batch_size
        )

    #4. Train, evaluate agent and store weights
    local_ddpg_agent, local_storage = local_agent_training_and_evaluation(
                iterator, collect_driver, time_step, policy_state, global_step, 
                tf_agent, eval_policy, local_storage, building_index, num_iterations, environments, agent_type="ddpg"
                )
      
    #Save federated weights for next round (Round + 1)
    model_dir = os.path.join(os.getcwd(), f"models/zeroshot/local/ddpg/LL_building_{building_index}")
    os.makedirs(model_dir, exist_ok=True)
    save_ddpg_weights(local_ddpg_agent, model_dir)

Building:  1  - round:  2
Return:  -4457.896


In [14]:
# Local Learning test on buildings 31 to 60

first_building = 31
environments, observation_spec, action_spec  = setup_energymanagement_environments_for_zeroShot(num_buildings=30, ecoPriority=0, noise_scale=0)

csv_name = f"NEW_LL_ZeroShot_DDPG_OnlyCosts"


SEED = 42
tf.random.set_seed(SEED)

batch_size = 128
replay_buffer_capacity = 20000 
initial_collect_steps = 2000
collect_steps_per_iteration = 30 
num_iterations = 5000
eval_interval = num_iterations-1
num_test_iterations = 1

num_rounds = 3
num_buildings = 30

In [15]:
# LOCAL LEARNING

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for idx in range(num_buildings):
    for round in range(num_rounds):
        
        building_index=first_building + idx
        print("Building: ", building_index, " - round: ", round)
        
        #0. Reset global step
        tf.compat.v1.reset_default_graph()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        
        #1. Initalize agent
        tf_agent, eval_policy, collect_policy = initialize_ddpg_agent(observation_spec, action_spec, global_step, environments, first_building=first_building)

        #2. Set global weights of this training round to agent (loads the weights of last training)
        model_dir = os.path.join(os.getcwd(), f"models/zeroshot/local/ddpg/LL_building_1")
        tf_agent = set_weights_to_ddpg_agent(tf_agent, model_dir)

        #3. Prepare training pipeline: Setup iterator, replay buffer, driver
        iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
            tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,collect_policy, 
            initial_collect_steps, collect_steps_per_iteration, batch_size
            )

        #3. Setup wandb logging
        artifact = initialize_wandb_logging(name=f"{csv_name}_Home{building_index}_rd{round}", num_iterations=num_test_iterations)

        #4. Train, evaluate agent and store weights
        result_df, metrics = agent_training_and_evaluation(global_step, num_test_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_agent, eval_policy, building_index, result_df, eval_interval, environments)

        #5. End and log wandb
        end_and_log_wandb(metrics, artifact)


Building:  31  - round:  0


  result_df = pd.concat(


Building:  31  - Total Profit:  -771.4160306324779  - Total Emissions:  3044.9603599742736
Building:  31  - round:  1
Building:  31  - Total Profit:  -771.5680667236136  - Total Emissions:  3045.4502707348106
Building:  31  - round:  2
Building:  31  - Total Profit:  -771.5680667236136  - Total Emissions:  3045.4502707348106
Building:  32  - round:  0
Building:  32  - Total Profit:  -802.4156726845177  - Total Emissions:  3110.0882463182465
Building:  32  - round:  1
Building:  32  - Total Profit:  -805.0650576079321  - Total Emissions:  3127.5502925103183
Building:  32  - round:  2
Building:  32  - Total Profit:  -805.0650576079321  - Total Emissions:  3127.5502925103183
Building:  33  - round:  0
Building:  33  - Total Profit:  -1285.7206038283289  - Total Emissions:  4826.7957296028635
Building:  33  - round:  1
Building:  33  - Total Profit:  -1306.858626236269  - Total Emissions:  5049.474191105027
Building:  33  - round:  2
Building:  33  - Total Profit:  -1306.858626236269  - To

In [16]:
# Save results
result_df['Setup'] = csv_name
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)
os.makedirs('results', exist_ok=True)
result_df.to_csv(f'results/{csv_name}.csv', index=False)

In [17]:
csv_name = f"NEW_LL_ZeroShot_DDPG_OnlyCosts"

result_df = pd.read_csv(f'results/{csv_name}.csv')
result_df['Total Profit'].sum()

-97229.4788109568

In [18]:
std_dev = result_df.groupby('Building')['Total Profit'].std().mean()
std_dev

4.473786919759886

In [19]:
import requests

def send_telegram_message(bot_token, chat_id, message):
    """Send a message to a Telegram chat via the Bot API."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {
        "chat_id": chat_id,
        "text": message,
        "parse_mode": "Markdown"
    }
    response = requests.post(url, json=payload)
    return response.json()

# Use the function
bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
chat_id = os.getenv('TELEGRAM_CHAT_ID')
message = f"Script PC Home ist fertig!"

result = send_telegram_message(bot_token, chat_id, message)
print(result)

{'ok': True, 'result': {'message_id': 255, 'from': {'id': 7071194232, 'is_bot': True, 'first_name': 'Reinforcement Learning', 'username': 'FederatedRL_Bot'}, 'chat': {'id': 5493937056, 'first_name': 'Jonas', 'last_name': 'Sievers', 'username': 'JonasSievers', 'type': 'private'}, 'date': 1722341110, 'text': 'Script PC Home ist fertig!'}}
