In [6]:
# Imports
import os
import logging
import pandas as pd

logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)

os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment, py_environment, batched_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
import matplotlib.pyplot as plt
import wandb

import sys
sys.path.insert(0, '..')
from environments.EnergyManagementEnv import EnergyManagementEnv
from utils.agentNetworks import ActorNetwork, CriticNetwork, CustomLayers
import utils.dataloader as DL

In [7]:
# Load data and setup environments

num_buildings = 30
energy_data = pd.read_csv("../../data/3final_data/Final_Energy_dataset.csv", header=0)
energy_data.set_index('Date', inplace=True)
energy_data.fillna(0, inplace=True)

dataset = {"train": {}, "eval": {}, "test": {}}
environments = {"train": {}, "eval": {}, "test": {}}
for idx in range(num_buildings):
    user_data = energy_data[[f'load_{idx+1}', f'pv_{idx+1}', 'price', 'fuelmix']]
    
    dataset["train"][f"building_{idx+1}"] = user_data[0:17520].set_index(pd.RangeIndex(0,17520))
    dataset["eval"][f"building_{idx+1}"] = user_data[17520:35088].set_index(pd.RangeIndex(0,17568))
    dataset["test"][f"building_{idx+1}"] = user_data[35088:52608].set_index(pd.RangeIndex(0,17520))

    environments["train"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["train"][f"building_{idx+1}"]))
    environments["eval"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["eval"][f"building_{idx+1}"]))
    environments["test"][f"building_{idx+1}"] = tf_py_environment.TFPyEnvironment(EnergyManagementEnv(init_charge=0.0, data=dataset["test"][f"building_{idx+1}"]))

print("Batch size: ", environments["train"][f"building_1"].batch_size)
print("State Space: {}, Action Space: {}".format(environments["train"][f"building_1"].observation_spec().shape[0], environments["train"][f"building_1"].action_spec().shape[0])) #SoE, price, price forecast 1-6
print("Upper bound: {}".format(round(environments["train"][f"building_1"].action_spec().maximum.item(), 3)))
dataset["test"][f"building_1"].head(1)

Batch size:  1
State Space: 6, Action Space: 1
Upper bound: 2.3


Unnamed: 0,load_1,pv_1,price,fuelmix
0,1.149,0.0,0.05704,0.530991


In [3]:
# Setup Agent networks
batch_size = 1
replay_buffer_capacity = 100000
initial_collect_steps = 1000
collect_steps_per_iteration = 1000 #2000
num_iterations = 50 #1500

for idx in range(num_buildings):

    global_step = tf.compat.v1.train.create_global_step()
    
    actor_net = ActorNetwork(
        observation_spec=environments["train"][f"building_{idx+1}"].observation_spec(),
        action_spec=environments["train"][f"building_{idx+1}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    critic_net = CriticNetwork(
        observation_spec=environments["train"][f"building_{idx+1}"].observation_spec(),
        action_spec=environments["train"][f"building_{idx+1}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    target_actor_network = ActorNetwork(
        observation_spec=environments["train"][f"building_{idx+1}"].observation_spec(),
        action_spec=environments["train"][f"building_{idx+1}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    target_critic_network = CriticNetwork(
        observation_spec=environments["train"][f"building_{idx+1}"].observation_spec(),
        action_spec=environments["train"][f"building_{idx+1}"].action_spec(),
        custom_layers=[CustomLayers.get_dense_layers(layers=1, units=4)],
        )

    tf_agent = ddpg_agent.DdpgAgent(
        environments["train"][f"building_{idx+1}"].time_step_spec(),
        environments["train"][f"building_{idx+1}"].action_spec(),
        actor_network=actor_net,
        critic_network=critic_net,
        actor_optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4), #-2 bis -4
        critic_optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3), #-1 bis -3
        ou_stddev=0.2, #0.3, # 0.2 , 0.3,
        ou_damping=0.15, #0.15, #0.15,
        target_actor_network=target_actor_network,
        target_critic_network=target_critic_network,
        target_update_tau=0.05, # 0.005, 0.01, 0.05,
        target_update_period=10, # 5, 20, 50
        dqda_clipping=1,
        td_errors_loss_fn= tf.compat.v1.losses.huber_loss, #tf.keras.losses.MeanSquaredError(),
        gamma=0.99, # 0.9, 0.99
        reward_scale_factor=10, # 1.0,
        train_step_counter=global_step,
    )

    tf_agent.initialize()
    collect_policy = tf_agent.collect_policy

    replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
        data_spec=tf_agent.collect_data_spec,
        batch_size= environments["train"][f"building_{idx+1}"].batch_size,
        max_length=replay_buffer_capacity,
    )

    initial_collect_driver = dynamic_step_driver.DynamicStepDriver(
        environments["train"][f"building_{idx+1}"],
        collect_policy,
        observers=[replay_buffer.add_batch],
        num_steps=initial_collect_steps,
    )

    collect_driver = dynamic_step_driver.DynamicStepDriver(
        environments["train"][f"building_{idx+1}"],
        collect_policy,
        observers=[replay_buffer.add_batch],
        num_steps=collect_steps_per_iteration,
    )

    wandb.login()
    wandb.init(
        project="DDPG_battery_testing",
        job_type="train_eval_test",
        name=f"Exp_building{idx+1}",
        config={
            "train_steps": num_iterations,
            "batch_size": batch_size,
            "actor_learning_rate": 1e-4,
            "critic_learning_rate": 1e-3}
    )
    artifact = wandb.Artifact(name='save', type="checkpoint")

    eval_metrics = [tf_metrics.AverageReturnMetric(batch_size=batch_size)]
    test_metrics = [tf_metrics.AverageReturnMetric(batch_size=batch_size)]

    """train_checkpointer = common.Checkpointer(
        ckpt_dir='checkpoints/ddpg/',
        max_to_keep=1,
        agent=tf_agent,
        policy=tf_agent.policy,
        replay_buffer=replay_buffer,
        global_step=global_step
    )
    train_checkpointer.initialize_or_restore()"""

    global_step = tf.compat.v1.train.get_or_create_global_step()

    # For better performance
    initial_collect_driver.run = common.function(initial_collect_driver.run)
    collect_driver.run = common.function(collect_driver.run)
    tf_agent.train = common.function(tf_agent.train)

    # Collect initial replay data
    initial_collect_driver.run()
    time_step = environments["train"][f"building_{idx+1}"].reset()
    policy_state = collect_policy.get_initial_state(environments["train"][f"building_{idx+1}"].batch_size)

    # pipeline which will feed data to the agent
    dataset = replay_buffer.as_dataset(num_parallel_calls=3, sample_batch_size=batch_size, num_steps=2).prefetch(3)
    iterator = iter(dataset)
    
    # Train and evaluate
    print(f"Start training building {idx+1}")
    while global_step.numpy() < num_iterations:
        print(global_step.numpy(), "/ ", num_iterations)
        time_step, policy_state = collect_driver.run(
            time_step=time_step,
            policy_state=policy_state,
        )
        experience, _ = next(iterator)
        train_loss = tf_agent.train(experience)

        if global_step.numpy() % 2 == 0:
            metrics = {}    
            metrics["Loss"] = train_loss.loss
            wandb.log(metrics)
    
    print("Start testing ...")
    metrics = metric_utils.eager_compute(
        test_metrics,
        environments["test"][f"building_{idx+1}"],
        tf_agent.policy,
        num_episodes=batch_size)
    logging = {}    
    logging["AverageReturn"] = metrics['AverageReturn'].numpy()
    wandb.log(logging)
    #artifact.add_dir(local_path='checkpoints/ddpg/')
    wandb.log_artifact(artifact)
    wandb.finish()
    tf.compat.v1.reset_default_graph()

Instructions for updating:
Use `tf.data.Dataset.counter(...)` instead.
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
Start training building 1
0 /  50
1 /  50
2 /  50
3 /  50
4 /  50
5 /  50
6 /  50
7 /  50
8 /  50
9 /  50
10 /  50
11 /  50
12 /  50
13 /  50
14 /  50
15 /  50
16 /  50
17 /  50
18 /  50
19 /  50
20 /  50
21 /  50
22 /  50
23 /  50
24 /  50
25 /  50
26 /  50
27 /  50
28 /  50
29 /  50
30 /  50
31 /  50
32 /  50
33 /  50
34 /  50
35 /  50
36 /  50
37 /  50
38 /  50
39 /  50
40 /  50
41 /  50
42 /  50
43 /  50
44 /  50
45 /  50
46 /  50
47 /  50
48 /  50
49 /  50
Start testing ...
Start training building 2
0 /  50
1 /  50
2 /  50
3 /  50
4 /  50
5 /  50
6 /  50
7 /  50
8 /  50
9 /  50
10 /  50
11 /  50
12 /  50
13 /  50
14 /  50
15 /  50
16 /  50
17 /  50
18 /  50
19 /  50
20 /  50
21 /  50
22 /  50
23 /  50
24 /  50
25 /  50
26 /  50
27 /  50
28 /  50
29 /  50
30 /  50
31 /  50
32 /  50
33 /  50
34 /  50
35 /  50
36 /  50
37 /  