## Script to generate graphs from agents

This script first runs one episode using ChargeAsFastAsPossible agent and saves the replay, then the other agents use the same trayectory. 

### First, declare the env and set the names

In [20]:
import gymnasium as gym
from stable_baselines3 import PPO, TD3, DDPG
from sb3_contrib import TRPO
import pkg_resources
import os

from ev2gym.models.ev2gym_env import EV2Gym
from ev2gym.rl_agent.reward import profit_maximization
from ev2gym.rl_agent.state import arrival_prices_flex
from ev2gym.baselines.heuristics import ChargeAsFastAsPossible
from ev2gym.utilities.evaluators import evaluate_model

# Select the configuration file
config_file = "/example_config_files/testPST.yaml"
config_file = pkg_resources.resource_filename('ev2gym', config_file)

# Creating the environment
env = EV2Gym(config_file,
             render_mode=False,
             save_plots=True,
             save_replay=True,
             save_mat=True,
             overwrite_name='test_10',
             state_function=arrival_prices_flex,
             reward_function=profit_maximization,
             flex_multiplier=0.0)


  config_file = pkg_resources.resource_filename('ev2gym', config_file)


Creating directory: ./results/test_10


### Generate the trajectory using the first agent (do not run if you dont want new trajectory)

In [21]:
agent = ChargeAsFastAsPossible()
while True:
    action=agent.get_action(env)
    obs, reward, done, a,info = env.step(action)

    if done:
        obs = env.reset()
        break

Saving replay file at ./replay/replay_test_10.pkl
Plotting simulation data at ./results/test_10/


## Now declare a new env and run the simulations
Here you need to declare the variables that the enviroment is going to use

In [2]:
# Folder containing the models
models_folder="./models/flex_10"

# State and reward (first is more important)
our_state=arrival_prices_flex
our_reward=profit_maximization

# Flex multiplier
c=1.0


### DDPG

In [3]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='DDPG',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/DDPG


In [4]:
run_dir = models_folder+"/DDPG/"
model = DDPG.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/DDPG/
total_ev_served:  46.0
total_profits:  600.780355072643
real_profits (no flexibility):  -159.1318543431072
Up_flexibility (kWh):  2882.5962093076264
Down_flexibility (kWh):  709.1229093318032
total_energy_charged:  774.3039202195303
average_user_satisfaction:  0.8313675484899833
energy_user_satisfaction:  -10000000.0
reward:  153.495675


### PPO

In [5]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='PPO',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/PPO


In [6]:
run_dir=models_folder+"/PPO/"
model = PPO.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/PPO/
total_ev_served:  46.0
total_profits:  700.7278578807246
real_profits (no flexibility):  -169.2890960186345
Up_flexibility (kWh):  3388.3409879104165
Down_flexibility (kWh):  749.9440835654484
total_energy_charged:  802.4016356680952
average_user_satisfaction:  0.8545866666666666
energy_user_satisfaction:  -10000000.0
reward:  430.121394


### TRPO

In [7]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='TRPO',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/TRPO


In [8]:
run_dir=models_folder+"/TRPO/"
model = TRPO.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/TRPO/
total_ev_served:  46.0
total_profits:  708.0342373897674
real_profits (no flexibility):  -166.63559156659625
Up_flexibility (kWh):  3418.456726885701
Down_flexibility (kWh):  734.7008875244887
total_energy_charged:  810.2599503495453
average_user_satisfaction:  0.855439866900315
energy_user_satisfaction:  -10000000.0
reward:  416.240366


### TD3

In [9]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='TD3',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/TD3


In [10]:
run_dir=models_folder+"/TD3/"
model = TD3.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/TD3/
total_ev_served:  46.0
total_profits:  501.59953410883344
real_profits (no flexibility):  -210.11872046723994
Up_flexibility (kWh):  2456.3134385147378
Down_flexibility (kWh):  908.5994003762391
total_energy_charged:  1017.5807027320333
average_user_satisfaction:  0.9328855555555556
energy_user_satisfaction:  -10000000.0
reward:  456.121154
