In [1]:
import gymnasium as gym
from stable_baselines3 import PPO, DDPG
from sb3_contrib import TRPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
import pkg_resources



from ev2gym.models.ev2gym_env import EV2Gym
from ev2gym.rl_agent.reward import profit_maximization
from ev2gym.rl_agent.state import arrival_prices
from ev2gym.utilities.callbacks import SaveBestReward
from ev2gym.utilities.evaluators import evaluate_model


import os

run_name = "./models/flex_00"
tsb_dir = "./runs/flex_00"

# we will use an example configuration file
config_file = "/example_config_files/testPST.yaml"
config_file = pkg_resources.resource_filename('ev2gym', config_file)

# Creating the environment
env = EV2Gym(config_file,
             render_mode=False,
             save_plots=False,
             save_replay=False,
             state_function=arrival_prices,
             reward_function=profit_maximization,
             flex_multiplier=0.0)

  import pkg_resources
  config_file = pkg_resources.resource_filename('ev2gym', config_file)


## DDPG

In [2]:
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
import numpy as np

# Create log dir
log_dir = run_name+"/DDPG/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Add Ornstein-Uhlenbeck noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize the RL agent
model = DDPG("MlpPolicy",env,learning_rate = 1e-5,action_noise=action_noise,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.ddpg.ddpg.DDPG at 0x7063ca8855e0>

In [3]:
# Load the best model and put the enviroment
log_dir = run_name+"/DDPG/"
model = DDPG.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.9
total_profits:  -203.21887722810672
real_profits (no flexibility):  -203.21887722810672
Up_flexibility (kWh):  1875.4348911719205
Down_flexibility (kWh):  884.7603520094126
total_energy_charged:  959.8309791925132
average_user_satisfaction:  0.9311955695849804
energy_user_satisfaction:  100.0
reward:  -292.83402923


## TD3

In [4]:
from stable_baselines3 import TD3

# Create log dir
log_dir = run_name+"/TD3/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Add Ornstein-Uhlenbeck noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize the RL agent
model = TD3("MlpPolicy",env,learning_rate = 1e-5,action_noise=action_noise,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.td3.td3.TD3 at 0x706396c9be00>

In [5]:
# Load the best model and put the enviroment
log_dir = run_name+"/TD3/"
model = TD3.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  45.07
total_profits:  -213.25209722683567
real_profits (no flexibility):  -213.25209722683567
Up_flexibility (kWh):  1737.410875762598
Down_flexibility (kWh):  932.805482666069
total_energy_charged:  1004.0221735086135
average_user_satisfaction:  0.9487971666666666
energy_user_satisfaction:  100.0
reward:  -251.02348299000002


## PPO

In [6]:
# Create log dir
log_dir = run_name+"/PPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)


# Initialize the RL agent
model = PPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_500_000,callback=eval_callback)

<stable_baselines3.ppo.ppo.PPO at 0x706396c9a4b0>

In [7]:
# Load the best model and put the enviroment
log_dir = run_name+"/PPO/"
model = PPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.77
total_profits:  -205.55394716056148
real_profits (no flexibility):  -205.5539471605615
Up_flexibility (kWh):  2406.4221983928355
Down_flexibility (kWh):  910.3036013909294
total_energy_charged:  981.1211900443396
average_user_satisfaction:  0.9418324128351421
energy_user_satisfaction:  100.0
reward:  -253.45348464999998


## TRPO

In [8]:
# Create log dir
log_dir = run_name+"/TRPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)

# Initialize the RL agent
model = TRPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_500_000,callback=eval_callback)

<sb3_contrib.trpo.trpo.TRPO at 0x7063ca8fcc50>

In [9]:
from ev2gym.utilities.evaluators import evaluate_model

# Load the best model and put the enviroment
model = TRPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.86
total_profits:  -166.92932795353715
real_profits (no flexibility):  -166.9293279535371
Up_flexibility (kWh):  3016.7304098454456
Down_flexibility (kWh):  732.0055808231156
total_energy_charged:  803.1130566853323
average_user_satisfaction:  0.8610583437047654
energy_user_satisfaction:  100.0
reward:  -445.13449409000003
