In [1]:
import gymnasium as gym
from stable_baselines3 import PPO, A2C, DDPG
from sb3_contrib import TRPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
import pkg_resources



from ev2gym.models.ev2gym_env import EV2Gym
from ev2gym.rl_agent.reward import profit_maximization_old
from ev2gym.rl_agent.state import arrival_prices
from ev2gym.utilities.callbacks import SaveBestReward
from ev2gym.utilities.evaluators import evaluate_model


import os

run_name = "./models/econ_public_old"
tsb_dir = "./runs/econ_public_old"

# we will use an example configuration file
config_file = "/example_config_files/PublicPST.yaml"
config_file = pkg_resources.resource_filename('ev2gym', config_file)

# Creating the environment
env = EV2Gym(config_file,
             render_mode=False,
             save_plots=False,
             save_replay=False,
             state_function=arrival_prices,
             reward_function=profit_maximization_old,
             flex_multiplier=0.0)

  import pkg_resources
  config_file = pkg_resources.resource_filename('ev2gym', config_file)


## DDPG

In [2]:
# Create log dir
log_dir = run_name+"/DDPG/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,n_eval_episodes=10,
                             deterministic=True, render=False,verbose=0)

# Initialize the RL agent
model = DDPG("MlpPolicy",env,learning_rate = 1e-5,learning_starts=200,tensorboard_log=tsb_dir)     
model.learn(total_timesteps=100_000,callback=eval_callback)

<stable_baselines3.ddpg.ddpg.DDPG at 0x793236866480>

In [3]:
# Load the best model and put the enviroment
model = DDPG.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  48.23
total_profits:  -101.28582761033849
real_profits (no flexibility):  -101.28582761033849
Up_flexibility (kWh):  4433.3512253783465
Down_flexibility (kWh):  400.24381895586953
total_energy_charged:  470.336973898633
average_user_satisfaction:  0.8774489905677373
energy_user_satisfaction:  100.0
reward:  -106.24679101


## PPO

In [4]:
# Create log dir

os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)


# Initialize the RL agent
model = PPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=1_000_000,callback=eval_callback)

<stable_baselines3.ppo.ppo.PPO at 0x7931cf12bf80>

In [9]:
# Load the best model and put the enviroment
log_dir = run_name+"/PPO/"
model = PPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  47.78
total_profits:  -14.352098505076547
real_profits (no flexibility):  -14.352098505076547
Up_flexibility (kWh):  6319.869136975935
Down_flexibility (kWh):  54.76303417701002
total_energy_charged:  66.19514850404144
average_user_satisfaction:  0.7079861892316631
energy_user_satisfaction:  100.0
reward:  -31.09490499


## TRPO

In [5]:
# Create log dir
log_dir = run_name+"/TRPO/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)

# Initialize the RL agent
model = TRPO("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=500_000,callback=eval_callback)

<sb3_contrib.trpo.trpo.TRPO at 0x7931cf1a3e90>

In [6]:
from ev2gym.utilities.evaluators import evaluate_model

# Load the best model and put the enviroment
model = TRPO.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  47.72
total_profits:  -0.7786157839086616
real_profits (no flexibility):  -0.7786157839086616
Up_flexibility (kWh):  6611.702966931937
Down_flexibility (kWh):  1.5777462809163751
total_energy_charged:  3.209372956471762
average_user_satisfaction:  0.683489194889568
energy_user_satisfaction:  100.0
reward:  -19.71453162


## A2C

In [7]:
# Create log dir
log_dir = run_name+"/A2C/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=2500,
                             deterministic=True, render=False,verbose=0)


# Initialize the RL agent
model = A2C("MlpPolicy", env,tensorboard_log=tsb_dir)
model.learn(total_timesteps=300_000,callback=eval_callback)

<stable_baselines3.a2c.a2c.A2C at 0x793330628c50>

In [8]:
# Load the best model and put the enviroment
model = A2C.load(log_dir+"best_model.zip")
model.set_env(env)

# Custom model evaluator
evaluate_model(model,100)

total_ev_served:  48.04
total_profits:  -9.032473151965185
real_profits (no flexibility):  -9.032473151965185
Up_flexibility (kWh):  6323.872558506583
Down_flexibility (kWh):  33.916457773240694
total_energy_charged:  40.644331336684026
average_user_satisfaction:  0.7013681138651993
energy_user_satisfaction:  100.0
reward:  -27.138468720000002
