In [1]:
import gymnasium as gym
from stable_baselines3 import PPO, DDPG
from sb3_contrib import TRPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
import pkg_resources



from ev2gym.models.ev2gym_env import EV2Gym
from ev2gym.rl_agent.reward import profit_maximization
from ev2gym.rl_agent.state import arrival_prices_flex
from ev2gym.utilities.callbacks import SaveBestReward
from ev2gym.utilities.evaluators import evaluate_model


import os

run_name = "./models/flex_03"
tsb_dir = "./runs/flex_03"

# we will use an example configuration file
config_file = "/example_config_files/testPST.yaml"
config_file = pkg_resources.resource_filename('ev2gym', config_file)

# Creating the environment
env = EV2Gym(config_file,
             render_mode=False,
             save_plots=False,
             save_replay=False,
             state_function=arrival_prices_flex,
             reward_function=profit_maximization,
             flex_multiplier=0.3)

  import pkg_resources
  config_file = pkg_resources.resource_filename('ev2gym', config_file)


## DDPG

In [2]:
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
import numpy as np

# Create log dir
log_dir = run_name+"/DDPG/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Add Ornstein-Uhlenbeck noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize the RL agent
model = DDPG("MlpPolicy",env,learning_rate = 1e-5,action_noise=action_noise,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.ddpg.ddpg.DDPG at 0x7eb26d98f950>

In [3]:
# Load the best model and put the enviroment
log_dir = run_name+"/DDPG/"
model = DDPG.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.98
total_profits:  -33.489596809091246
real_profits (no flexibility):  -218.14663983851833
Up_flexibility (kWh):  1963.5700537701966
Down_flexibility (kWh):  950.7790969254122
total_energy_charged:  1042.4141537210721
average_user_satisfaction:  0.9646574772305226
energy_user_satisfaction:  100.0
reward:  -21.63537718


## TD3

In [4]:
from stable_baselines3 import TD3

# Create log dir
log_dir = run_name+"/TD3/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Add Ornstein-Uhlenbeck noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize the RL agent
model = TD3("MlpPolicy",env,learning_rate = 1e-5,action_noise=action_noise,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.td3.td3.TD3 at 0x7eb240293770>

In [5]:
# Load the best model and put the enviroment
log_dir = run_name+"/TD3/"
model = TD3.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.65
total_profits:  -23.144245878367748
real_profits (no flexibility):  -215.75249369313846
Up_flexibility (kWh):  2084.2577477627856
Down_flexibility (kWh):  952.4924312823155
total_energy_charged:  1040.2395835589666
average_user_satisfaction:  0.966474888888889
energy_user_satisfaction:  100.0
reward:  -3.71729601


## PPO

In [2]:
# Create log dir
log_dir = run_name+"/PPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)


# Initialize the RL agent
model = PPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_500_000,callback=eval_callback)

<stable_baselines3.ppo.ppo.PPO at 0x7d44c9329640>

In [3]:
# Load the best model and put the enviroment
log_dir = run_name+"/PPO/"
model = PPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.41
total_profits:  52.45044715862293
real_profits (no flexibility):  -194.01925881888133
Up_flexibility (kWh):  3052.449722735067
Down_flexibility (kWh):  866.9115167475363
total_energy_charged:  937.0015779382924
average_user_satisfaction:  0.9247097253325005
energy_user_satisfaction:  100.0
reward:  1.2815920400000003


## TRPO

In [4]:
# Create log dir
log_dir = run_name+"/TRPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)

# Initialize the RL agent
model = TRPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_500_000,callback=eval_callback)

<sb3_contrib.trpo.trpo.TRPO at 0x7d449797d6a0>

In [5]:
from ev2gym.utilities.evaluators import evaluate_model

# Load the best model and put the enviroment
model = TRPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  45.04
total_profits:  44.61334263057168
real_profits (no flexibility):  -201.4307806601092
Up_flexibility (kWh):  3008.4717328160677
Down_flexibility (kWh):  903.0442402686803
total_energy_charged:  975.7644871200686
average_user_satisfaction:  0.9356206210256977
energy_user_satisfaction:  100.0
reward:  17.51032253
