In [1]:
import gymnasium as gym
from stable_baselines3 import PPO, DDPG
from sb3_contrib import TRPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
import pkg_resources



from ev2gym.models.ev2gym_env import EV2Gym
from ev2gym.rl_agent.reward import profit_maximization
from ev2gym.rl_agent.state import arrival_prices_flex
from ev2gym.utilities.callbacks import SaveBestReward
from ev2gym.utilities.evaluators import evaluate_model


import os

run_name = "./models/flex_05"
tsb_dir = "./runs/flex_05"

# we will use an example configuration file
config_file = "/example_config_files/testPST.yaml"
config_file = pkg_resources.resource_filename('ev2gym', config_file)

# Creating the environment
env = EV2Gym(config_file,
             render_mode=False,
             save_plots=False,
             save_replay=False,
             state_function=arrival_prices_flex,
             reward_function=profit_maximization,
             flex_multiplier=0.5)

  import pkg_resources
  config_file = pkg_resources.resource_filename('ev2gym', config_file)


## DDPG

In [2]:
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
import numpy as np

# Create log dir
log_dir = run_name+"/DDPG/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Add Ornstein-Uhlenbeck noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize the RL agent
model = DDPG("MlpPolicy",env,learning_rate = 1e-5,action_noise=action_noise,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.ddpg.ddpg.DDPG at 0x7dfd1a7cb560>

In [3]:
# Load the best model and put the enviroment
log_dir = run_name+"/DDPG/"
model = DDPG.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.87
total_profits:  147.5350300887514
real_profits (no flexibility):  -203.53371897611666
Up_flexibility (kWh):  2439.569143889163
Down_flexibility (kWh):  894.3344429837479
total_energy_charged:  984.1995317676939
average_user_satisfaction:  0.9405596788812971
energy_user_satisfaction:  100.0
reward:  96.14054383000001


## TD3

In [4]:
from stable_baselines3 import TD3

# Create log dir
log_dir = run_name+"/TD3/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Add Ornstein-Uhlenbeck noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize the RL agent
model = TD3("MlpPolicy",env,learning_rate = 1e-5,action_noise=action_noise,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.td3.td3.TD3 at 0x7dfce51bf590>

In [5]:
# Load the best model and put the enviroment
log_dir = run_name+"/TD3/"
model = TD3.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.4
total_profits:  108.72542975706169
real_profits (no flexibility):  -214.7031065156672
Up_flexibility (kWh):  2115.265495372967
Down_flexibility (kWh):  952.5311170475375
total_energy_charged:  1038.6634555483638
average_user_satisfaction:  0.9684406078485149
energy_user_satisfaction:  100.0
reward:  128.32976156


## PPO

In [2]:
# Create log dir
log_dir = run_name+"/PPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)


# Initialize the RL agent
model = PPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_500_000,callback=eval_callback)

<stable_baselines3.ppo.ppo.PPO at 0x72f8fc7206b0>

In [3]:
# Load the best model and put the enviroment
log_dir = run_name+"/PPO/"
model = PPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.76
total_profits:  231.85351238710234
real_profits (no flexibility):  -189.50799092606067
Up_flexibility (kWh):  3172.2630174070828
Down_flexibility (kWh):  845.108280012721
total_energy_charged:  916.9209984109807
average_user_satisfaction:  0.9112641074779606
energy_user_satisfaction:  100.0
reward:  149.00142169999998


## TRPO

In [4]:
# Create log dir
log_dir = run_name+"/TRPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)

# Initialize the RL agent
model = TRPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_500_000,callback=eval_callback)

<sb3_contrib.trpo.trpo.TRPO at 0x72f901cc17c0>

In [5]:
from ev2gym.utilities.evaluators import evaluate_model

# Load the best model and put the enviroment
model = TRPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  44.95
total_profits:  229.01769186926097
real_profits (no flexibility):  -188.15350853807257
Up_flexibility (kWh):  3137.519440908586
Down_flexibility (kWh):  839.472902444527
total_energy_charged:  910.0627421210837
average_user_satisfaction:  0.9078805500253152
energy_user_satisfaction:  100.0
reward:  129.56615868
