In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np

import ray
from ray.rllib.algorithms.ppo import PPOConfig

from src.parsers import HMParser
from src.resources import Aggregator, Generator, Load, Storage, Vehicle
from src.algorithms.rl import EnergyCommunitySequentialV0



In [2]:
# Data parsing

data = HMParser(file_path='/Users/ecgomes/DataspellProjects/pyecom/data/EC_V4.xlsx', ec_id=1)
data.parse()

In [3]:
# Define resources

resources = []

# Add the generators
for i in range(data.generator['p_forecast'].shape[0]):
    resources.append(Generator(name='generator_{:02d}'.format(i+1),
                               value=np.zeros(data.generator['p_forecast'][i].shape),
                               lower_bound=np.zeros(data.generator['p_forecast'][i].shape),
                               upper_bound=data.generator['p_forecast'][i],
                               cost=data.generator['cost_parameter_b'][i],
                               cost_nde=data.generator['cost_nde'][i],
                               is_renewable=data.generator['type_generator'][i]))

# Add the loads
for i in range(data.load['p_forecast'].shape[0]):
    resources.append(Load(name='load_{:02d}'.format(i+1),
                          value=data.load['p_forecast'][i],
                          lower_bound=np.zeros(data.load['p_forecast'][i].shape),
                          upper_bound=data.load['p_forecast'][i],
                          cost=np.ones(data.load['p_forecast'][i].shape),
                          cost_cut=data.load['cost_cut'][i],
                          cost_reduce=data.load['cost_reduce'][i],
                          cost_ens=data.load['cost_ens'][i]))
    
# Add the storage
for i in range(data.storage['p_charge_limit'].shape[0]):
    resources.append(Storage(name='storage_{:02d}'.format(i+1),
                             value=data.storage['initial_state'][i],
                             lower_bound=np.ones(data.storage['p_charge_limit'].shape[0]) * data.storage['energy_min_percentage'][i],
                             upper_bound=(data.storage['energy_capacity'] * 
                                          np.ones(data.storage['p_charge_limit'].shape[0])),
                             cost=np.ones(data.storage['p_charge_limit'].shape[0]),
                             cost_discharge=data.storage['discharge_price'][i],
                             cost_charge=data.storage['charge_price'][i],
                             capacity_max=data.storage['energy_capacity'][i],
                             capacity_min=data.storage['energy_min_percentage'][i],
                             initial_charge=data.storage['initial_state'][i],
                             discharge_efficiency=data.storage['discharge_efficiency'][i],
                             discharge_max=data.storage['p_discharge_limit'][i],
                             charge_efficiency=data.storage['charge_efficiency'][i],
                             charge_max=data.storage['p_charge_limit'][i],
                             capital_cost=np.array([0.05250, 0.10500, 0.01575])))
    
# Define the Electric Vehicles
for i in np.arange(data.vehicle['e_capacity_max'].shape[0]):
    new_ev = Vehicle(name='ev_{:02d}'.format(i + 1),
                     value=data.vehicle['min_technical_soc'][i] * data.vehicle['e_capacity_max'][i],
                     lower_bound=np.ones(data.vehicle['schedule_charge'][i].shape) * \
                                 data.vehicle['min_technical_soc'][i] * data.vehicle['e_capacity_max'][i],
                     upper_bound=np.ones(data.vehicle['schedule_charge'][i].shape) * \
                                 data.vehicle['e_capacity_max'][i],
                     cost = np.zeros(data.vehicle['schedule_charge'][i].shape),
                     cost_discharge=np.ones(data.vehicle['schedule_charge'][i].shape) * \
                                    data.vehicle['discharge_price'][i, 0],
                     cost_charge=np.ones(data.vehicle['schedule_charge'][i].shape) * \
                                 data.vehicle['charge_price'][i, 0],
                     capacity_max=data.vehicle['e_capacity_max'][i],
                     initial_charge=data.vehicle['min_technical_soc'][i] * data.vehicle['e_capacity_max'][i],
                     min_charge=data.vehicle['min_technical_soc'][i] * data.vehicle['e_capacity_max'][i],
                     discharge_efficiency=data.vehicle['charge_efficiency'][i],
                     charge_efficiency=data.vehicle['charge_efficiency'][i],
                     schedule_connected=data.vehicle['schedule'][i],
                     schedule_discharge=data.vehicle['schedule_discharge'][i],
                     schedule_charge=data.vehicle['schedule_charge'][i],
                     schedule_requirement_soc=data.vehicle['schedule_departure_soc'][i],
                     schedule_arrival_soc=data.vehicle['schedule_arrival_soc'][i]
                     )
    resources.append(new_ev)
    
# Append Aggregator
resources.append(Aggregator(name='aggregator',
                            value=np.zeros(data.load['p_forecast'][0, :].shape),
                            lower_bound=np.zeros(data.load['p_forecast'][0, :].shape),
                            upper_bound=data.peers['import_contracted_p_max'][0, :],
                            cost=data.peers['buy_price'][0, :],
                            imports=np.zeros(data.load['p_forecast'][0, :].shape),
                            exports=np.zeros(data.load['p_forecast'][0, :].shape),
                            import_cost=data.peers['buy_price'][0, :],
                            export_cost=data.peers['sell_price'][0, :],
                            import_max=data.peers['import_contracted_p_max'][0, :],
                            export_max=data.peers['export_contracted_p_max'][0, :]))

In [4]:
# Create the environment and check if everything is ok

temp_env = EnergyCommunitySequentialV0(resources=resources,
                                       import_penalty=1000,
                                       export_penalty=1000,
                                       storage_action_penalty=500,
                                       ev_action_penalty=500,
                                       ev_requirement_penalty=700,
                                       balance_penalty=1000)
temp_env.reset()
terminations = truncations = {a: False for a in temp_env.agents}
terminations['__all__'] = False
truncations['__all__'] = False
while not terminations['__all__'] and not truncations['__all__']:
    actions = temp_env.action_space.sample()
    observations, rewards, terminations, truncations, infos = temp_env.step(actions)
    # print('Observation: {}'.format(observations))
    # print('Observation keys: {}'.format(observations.keys()))
    # print('Reward: {}'.format(rewards))
    # print('Infos: {}\n'.format(infos))
print('Terminated: {}'.format(terminations['__all__']))

exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists action
exists

In [5]:
# Check for environment erros

ray.rllib.utils.check_env(env=temp_env)

exists action


In [6]:
# Aux function to assign policies

def assign_policies(env):

    policies = {}
    for agent in env.agents:
        policies[agent] = (None,
                           env.observation_space[agent],
                           env.action_space[agent],
                           {})
    return policies

# policies = assign_policies(env=temp_env)

In [7]:
# Create the policies dictionary

def assign_group_policies(env):
    
    policies = {'generator': (None,
                              env.observation_space['generator_01'],
                                env.action_space['generator_01'],
                                {}),
                'storage': (None,
                            env.observation_space['storage_01'],
                            env.action_space['storage_01'],
                            {}),
                'ev': (None,
                       env.observation_space['ev_01'],
                       env.action_space['ev_01'],
                       {}),
                'aggregator': (None,
                               env.observation_space['aggregator'],
                               env.action_space['aggregator'],
                               {})           
                }
    
    return policies

policies = assign_policies(env=temp_env)

In [None]:
# Create an RLlib Algorithm instance from a PPOConfig to learn how to
# act in the above environment.

from ray.tune import register_env

ray.shutdown()
ray.init()

register_env("EC_Multi", lambda config: EnergyCommunitySequentialV0(resources=resources,
                                                                    import_penalty=1000,
                                                                    export_penalty=1000,
                                                                    storage_action_penalty=500,
                                                                    ev_action_penalty=500,
                                                                    ev_requirement_penalty=700,
                                                                    balance_penalty=1000),
             )

config = (
    PPOConfig()
    .environment(env="EC_Multi")
    .training(
        train_batch_size=240,
        lr=2e-4,
        gamma=0.99)
        # entropy_coeff=0.05,
        # kl_coeff=0.0)
    .framework(
        'torch')
        #'tf2', eager_tracing=False)
    .rollouts(batch_mode='complete_episodes',
              num_rollout_workers=1,
              rollout_fragment_length=24)
    .multi_agent(policies=policies,
                 policy_mapping_fn=(lambda agent_id, episode, worker, **kwargs: agent_id))
    #.multi_agent(policies=policies,
    #             policy_mapping_fn=(lambda agent_id, episode, worker, **kwargs: 
    #             'generator' if agent_id.startswith('generator') else
    #             'storage' if agent_id.startswith('storage') else
    #             'ev' if agent_id.startswith('ev') else
    #             'aggregator'))
)

# Use the config's `build()` method to construct a PPO object.
algo = config.build()

# Train for n iterations and report results (mean episode rewards).
# Since we have to guess 10 times and the optimal reward is 0.0
# (exact match between observation and action value),
# we can expect to reach an optimal episode reward of 0.0.
checkpoint = None
for i in range(20):
    results = algo.train()
    print(f"Iter: {i}; avg. reward={results['episode_reward_mean']}")
    if results['episode_reward_mean'] > -50.0:
        break

    # Save the checkpoint to disk.
    checkpoint = algo.save()
    print("checkpoints saved at", checkpoint)