In [1]:
# Imports
import time
import os
import pandas as pd
import tensorflow as tf

import sys
sys.path.insert(0, '..')
from utils.reinforcementLearningHelper import *







In [2]:
#Setup Environments of selected buildings for training, evaluation, and testing

environments, observation_spec, action_spec  = setup_energymanagement_environments2(num_buildings=30, ecoPriority=1, noise_scale=0)

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_1"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 148 / Action Space: 1 / Upper bound: 2.3


In [3]:
# Setup Agent networks
SEED = 42
tf.random.set_seed(SEED)

batch_size = 128
replay_buffer_capacity = 20000 
initial_collect_steps = 2000
collect_steps_per_iteration = 30 
num_iterations = 5000 
eval_interval = num_iterations-1

num_rounds = 3
num_buildings = 30

In [4]:
csv_name = "NEW_LL_TD3_OnlyEmissions_v2"

In [5]:
# LOCAL LEARNING

result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

start_time = time.time()

for idx in range(num_buildings):
    for round in range(num_rounds):
        building_index=idx+1
        print("Building: ", building_index, " - round: ", round)
        #0. Reset global step
        tf.compat.v1.reset_default_graph()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        
        #1. Initalize agent
        tf_agent, eval_policy, collect_policy = initialize_td3_agent(observation_spec, action_spec, global_step, environments)

        #2. Prepare training pipeline: Setup iterator, replay buffer, driver
        iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
            tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,collect_policy, 
            initial_collect_steps, collect_steps_per_iteration, batch_size
            )

        #3. Setup wandb logging
        artifact = initialize_wandb_logging(name=f"{csv_name}_Home{building_index}_rd{round}", num_iterations=num_iterations)

        #4. Train, evaluate agent and store weights
        result_df, metrics = agent_training_and_evaluation(global_step, num_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_agent, eval_policy, building_index, result_df, eval_interval, environments)
            
        #6. End and log wandb
        end_and_log_wandb(metrics, artifact)

end_time = time.time()
time_taken = end_time - start_time

Building:  1  - round:  0

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


  result_df = pd.concat(


Building:  1  - Total Profit:  -834.9018880246663  - Total Emissions:  3508.9990993358488
Building:  1  - round:  1
Building:  1  - Total Profit:  -840.2593058538055  - Total Emissions:  3631.441161703713
Building:  1  - round:  2
Building:  1  - Total Profit:  -860.1634951564255  - Total Emissions:  3751.524440377319
Building:  2  - round:  0
Building:  2  - Total Profit:  -1096.3584421209496  - Total Emissions:  3771.2460698996656
Building:  2  - round:  1
Building:  2  - Total Profit:  -1105.1402701920922  - Total Emissions:  3888.8070221550665
Building:  2  - round:  2
Building:  2  - Total Profit:  -1121.2049812409352  - Total Emissions:  3751.8875137997597
Building:  3  - round:  0
Building:  3  - Total Profit:  -261.9371245694267  - Total Emissions:  990.1493207380319
Building:  3  - round:  1
Building:  3  - Total Profit:  -274.5918280842463  - Total Emissions:  1087.8216487589737
Building:  3  - round:  2
Building:  3  - Total Profit:  -261.9503431605074  - Total Emissions:  9

In [6]:
# Save results
result_df.rename(columns={'Total Profit': 'Profit'}, inplace=True)
result_df['Setup'] = 'TD3_LL'
result_df['Time'] = time_taken
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)
os.makedirs('results', exist_ok=True)
result_df.to_csv(f'results/TD3_LL_results_Time.csv', index=False)
print("Final reuslt: ", result_df["Profit"].sum(), "Time: ", result_df["Time"])

Final reuslt:  -97121.08641211338 Time:  0     14958.801133
1     14958.801133
2     14958.801133
3     14958.801133
4     14958.801133
          ...     
85    14958.801133
86    14958.801133
87    14958.801133
88    14958.801133
89    14958.801133
Name: Time, Length: 90, dtype: float64


In [7]:
import requests

def send_telegram_message(bot_token, chat_id, message):
    """Send a message to a Telegram chat via the Bot API."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {
        "chat_id": chat_id,
        "text": message,
        "parse_mode": "Markdown"
    }
    response = requests.post(url, json=payload)
    return response.json()

# Use the function
bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
chat_id = os.getenv('TELEGRAM_CHAT_ID')
message = f"Script TD3 LL Time ist fertig at .81!"

result = send_telegram_message(bot_token, chat_id, message)
print(result)

{'ok': True, 'result': {'message_id': 223, 'from': {'id': 7071194232, 'is_bot': True, 'first_name': 'Reinforcement Learning', 'username': 'FederatedRL_Bot'}, 'chat': {'id': 5493937056, 'first_name': 'Jonas', 'last_name': 'Sievers', 'username': 'JonasSievers', 'type': 'private'}, 'date': 1722016957, 'text': 'Script TD3 LL Time ist fertig at .81!'}}


In [8]:
result_df['Total Emissions'].sum()

340313.6597889771

In [9]:
std_dev = result_df.groupby('Building')['Total Emissions'].std().mean()
std_dev

135.2783554085116