In [1]:
# Imports
import time
import os
import logging
import pandas as pd
import wandb
import tensorflow as tf

logging.getLogger("wandb").setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'

import sys
sys.path.insert(0, '..')
from utils.reinforcementLearningHelper import *







In [2]:
#Setup Environments of selected buildings for training, evaluation, and testing

environments, observation_spec, action_spec  = setup_energymanagement_environments2(num_buildings=30, ecoPriority=0, noise_scale=0)

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_1"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 58 / Action Space: 1 / Upper bound: 2.3


In [3]:
# Setup Agent networks
SEED = 42
tf.random.set_seed(SEED)

batch_size = 128
replay_buffer_capacity = 20000 
initial_collect_steps = 2000
collect_steps_per_iteration = 30  
num_iterations = 5000  
eval_interval = num_iterations - 1

num_rounds = 3
num_buildings = 30

In [4]:
csv_name = "NEW_LL_PPO_OnlyCosts_v6"

In [5]:
result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

start_time = time.time()

for idx in range(num_buildings):
    for round in range(num_rounds):
        building_index=idx+1
        print("Building: ", building_index, " - round: ", round)

        #0. Reset global step
        tf.compat.v1.reset_default_graph()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        
        #1. Initalize agent
        tf_agent, eval_policy, collect_policy = initialize_ppo_agent(observation_spec, action_spec, global_step, environments)

        #2. Prepare training pipeline: Setup iterator, replay buffer, driver
        iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
            tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,collect_policy, 
            initial_collect_steps, collect_steps_per_iteration, batch_size
            )
        
        #3. Setup wandb logging
        artifact = initialize_wandb_logging(name=f"{csv_name}_Home{building_index}_rd{round}", num_iterations=num_iterations)

        result_df, metrics = agent_training_and_evaluation(global_step, num_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_agent, eval_policy, building_index, result_df, eval_interval, environments)

        #5. End and log wandb
        end_and_log_wandb(metrics, artifact)

end_time = time.time()
time_taken = start_time - end_time

Building:  1  - round:  0

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


  result_df = pd.concat(


Building:  1  - Total Profit:  -1714.5249972017427  - Total Emissions:  9012.980633488682
Building:  1  - round:  1
Building:  1  - Total Profit:  -1265.6391123403116  - Total Emissions:  6426.408459600721
Building:  1  - round:  2
Building:  1  - Total Profit:  -1266.1511366278603  - Total Emissions:  6431.124608590856
Building:  2  - round:  0
Building:  2  - Total Profit:  -1165.7040444699956  - Total Emissions:  4486.300561874969
Building:  2  - round:  1
Building:  2  - Total Profit:  -1165.7040444699956  - Total Emissions:  4486.300561874969
Building:  2  - round:  2
Building:  2  - Total Profit:  -1165.7040444699956  - Total Emissions:  4486.300561874969
Building:  3  - round:  0
Building:  3  - Total Profit:  -354.8583669999963  - Total Emissions:  1719.2886666884938
Building:  3  - round:  1
Building:  3  - Total Profit:  -354.8583669999963  - Total Emissions:  1719.2886666884938
Building:  3  - round:  2
Building:  3  - Total Profit:  -354.8583669999963  - Total Emissions:  1

In [6]:
# Save results
result_df['Setup'] = csv_name
result_df['Time'] = time_taken
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)
os.makedirs('results', exist_ok=True)
result_df.to_csv(f'results/{csv_name}.csv', index=False)

print("Final result: ", result_df["Total Profit"].sum())

Final result:  -108458.82136410743


In [7]:
import requests

def send_telegram_message(bot_token, chat_id, message):
    """Send a message to a Telegram chat via the Bot API."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {
        "chat_id": chat_id,
        "text": message,
        "parse_mode": "Markdown"
    }
    response = requests.post(url, json=payload)
    return response.json()

# Use the function
bot_token = os.getenv('TELEGRAM_BOT_TOKEN') ###
chat_id = os.getenv('TELEGRAM_CHAT_ID') ###
message = f"Script .81 ist fertig!"

result = send_telegram_message(bot_token, chat_id, message)
print(result)

{'ok': True, 'result': {'message_id': 277, 'from': {'id': 7071194232, 'is_bot': True, 'first_name': 'Reinforcement Learning', 'username': 'FederatedRL_Bot'}, 'chat': {'id': 5493937056, 'first_name': 'Jonas', 'last_name': 'Sievers', 'username': 'JonasSievers', 'type': 'private'}, 'date': 1722598479, 'text': 'Script .81 ist fertig!'}}


In [8]:
result_df['Total Profit'].sum()

-108458.82136410743

In [9]:
std_dev = result_df.groupby('Building')['Total Emissions'].std().mean()
std_dev

136.55890651917312