In [1]:
# Imports
import os
import logging
import pandas as pd
import wandb
import tensorflow as tf

logging.getLogger("wandb").setLevel(logging.ERROR)
os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'

import sys
sys.path.insert(0, '..')
from utils.reinforcementLearningHelper import *







In [2]:
#Setup Environments of selected buildings for training, evaluation, and testing

environments, observation_spec, action_spec  = setup_energymanagement_environments(num_buildings=30)

#Check environment setup
print(
    "Batch size:", environments["train"][f"building_1"].batch_size, 
    "/ State Space: {} / Action Space: {}".format(observation_spec.shape[0], action_spec.shape[0]),
    "/ Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)),
)

Batch size: 1 / State Space: 22 / Action Space: 1 / Upper bound: 2.3


In [3]:
# Setup Agent networks
SEED = 42
tf.random.set_seed(SEED)

batch_size = 128
replay_buffer_capacity = 20000 
initial_collect_steps = 2000
collect_steps_per_iteration = 30 
num_iterations = 10000 
eval_interval = num_iterations - 10

num_rounds = 3
num_buildings = 30

In [4]:
result_df = pd.DataFrame(columns=['Building', 'Total Profit'])

for idx in range(num_buildings):
    for round in range(num_rounds):
        building_index=idx+1
        print("Building: ", building_index, " - round: ", round)

        #0. Reset global step
        #tf.compat.v1.reset_default_graph()
        global_step = tf.Variable(0, trainable=False, name='global_step')
        
        #1. Initalize agent
        tf_agent, eval_policy, collect_policy = initialize_ppo_agent(observation_spec, action_spec, global_step, environments)

        #2. Prepare training pipeline: Setup iterator, replay buffer, driver
        iterator, collect_driver, time_step, policy_state = setup_rl_training_pipeline(
            tf_agent, environments["train"][f"building_{building_index}"], replay_buffer_capacity,collect_policy, 
            initial_collect_steps, collect_steps_per_iteration, batch_size
            )
        
        #3. Setup wandb logging
        artifact = initialize_wandb_logging(name=f"Exp_PPO_LL_Home{building_index}_rd{round}", num_iterations=num_iterations)

        result_df, metrics = agent_training_and_evaluation(global_step, num_iterations, collect_driver, 
                time_step, policy_state, iterator, tf_agent, eval_policy, building_index, result_df, eval_interval, environments)

        #5. End and log wandb
        end_and_log_wandb(metrics, artifact)
               

Building:  1  - round:  0

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


  result_df = pd.concat([result_df, pd.DataFrame({'Building': [building_index], 'Total Profit': [wandb.summary["Final Profit"]]})], ignore_index=True)


Building:  1  - Total Profit:  -1404.3812679346863
Building:  1  - round:  1
Building:  1  - Total Profit:  -1224.6008991110784
Building:  1  - round:  2
Building:  1  - Total Profit:  -1280.9605834782908
Building:  2  - round:  0
Building:  2  - Total Profit:  -1193.2074023605803
Building:  2  - round:  1
Building:  2  - Total Profit:  -1312.9613326809642
Building:  2  - round:  2
Building:  2  - Total Profit:  -1169.2475732870796
Building:  3  - round:  0
Building:  3  - Total Profit:  -358.04145015015433
Building:  3  - round:  1
Building:  3  - Total Profit:  -358.107273316499
Building:  3  - round:  2
Building:  3  - Total Profit:  -354.93104713999634
Building:  4  - round:  0
Building:  4  - Total Profit:  -985.9652954511841
Building:  4  - round:  1
Building:  4  - Total Profit:  -982.589453104502
Building:  4  - round:  2
Building:  4  - Total Profit:  -942.5523124035392
Building:  5  - round:  0
Building:  5  - Total Profit:  -1107.830100090001
Building:  5  - round:  1
Buildi

In [5]:
# Save results
result_df.rename(columns={'Total Profit': 'Profit'}, inplace=True)
result_df['Setup'] = 'PPO_LL'
result_df.index.name = 'Building_nr'
result_df.reset_index(inplace=True, drop=True)
os.makedirs('results', exist_ok=True)
result_df.to_csv(f'results/PPO_LL_results_setting_A.csv', index=False)
print("Final reuslt: ", result_df["Profit"].sum())

Final reuslt:  -109941.40236275663


In [6]:
import requests

def send_telegram_message(bot_token, chat_id, message):
    """Send a message to a Telegram chat via the Bot API."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {
        "chat_id": chat_id,
        "text": message,
        "parse_mode": "Markdown"
    }
    response = requests.post(url, json=payload)
    return response.json()

# Use the function
bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
chat_id = os.getenv('TELEGRAM_CHAT_ID')
message = f"Script PPO LL ist fertig!"

result = send_telegram_message(bot_token, chat_id, message)
print(result)

{'ok': True, 'result': {'message_id': 10, 'from': {'id': 7071194232, 'is_bot': True, 'first_name': 'Reinforcement Learning', 'username': 'FederatedRL_Bot'}, 'chat': {'id': 5493937056, 'first_name': 'Jonas', 'last_name': 'Sievers', 'username': 'JonasSievers', 'type': 'private'}, 'date': 1714603623, 'text': 'Script PPO LL ist fertig!'}}
