## Script to generate graphs from agents

This script first runs one episode using ChargeAsFastAsPossible agent and saves the replay, then the other agents use the same trayectory. 

### First, declare the env and set the names

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO, TD3, DDPG
from sb3_contrib import TRPO
import pkg_resources
import os

from ev2gym.models.ev2gym_env import EV2Gym
from ev2gym.rl_agent.reward import profit_maximization
from ev2gym.rl_agent.state import arrival_prices_flex
from ev2gym.rl_agent.state import arrival_prices
from ev2gym.baselines.heuristics import ChargeAsFastAsPossible
from ev2gym.utilities.evaluators import evaluate_model

# Select the configuration file
config_file = "/example_config_files/testPST.yaml"
config_file = pkg_resources.resource_filename('ev2gym', config_file)

# Creating the environment
env = EV2Gym(config_file,
             render_mode=False,
             save_plots=True,
             save_replay=True,
             save_mat=True,
             overwrite_name='test',
             state_function=arrival_prices_flex,
             reward_function=profit_maximization,
             flex_multiplier=0.1)


  import pkg_resources
  config_file = pkg_resources.resource_filename('ev2gym', config_file)


Creating directory: ./results/test


### Generate the trajectory using the first agent (do not run if you dont want new trajectory)

In [None]:
agent = ChargeAsFastAsPossible()
while True:
    action=agent.get_action(env)
    obs, reward, done, a,info = env.step(action)

    if done:
        obs = env.reset()
        break

## Now declare a new env and run the simulations
Here you need to declare the variables that the enviroment is going to use

In [2]:
# Folder containing the models
models_folder="./models/flex_00"

# State and reward (first is more important)
our_state=arrival_prices
our_reward=profit_maximization

# Flex multiplier
c=0.0


### DDPG

In [3]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='DDPG',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/DDPG


In [4]:
run_dir = models_folder+"/DDPG/"
model = DDPG.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/DDPG/
total_ev_served:  46.0
total_profits:  -221.20196355207077
real_profits (no flexibility):  -221.20196355207077
Up_flexibility (kWh):  2026.177153234585
Down_flexibility (kWh):  955.7677050975296
total_energy_charged:  1032.389263413058
average_user_satisfaction:  0.9421122222222222
energy_user_satisfaction:  -10000000.0
reward:  -250.652946


### PPO

In [5]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='PPO',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/PPO


In [6]:
run_dir=models_folder+"/PPO/"
model = PPO.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/PPO/
total_ev_served:  46.0
total_profits:  -217.62275464702319
real_profits (no flexibility):  -217.62275464702324
Up_flexibility (kWh):  2310.4979199406807
Down_flexibility (kWh):  957.7433898771856
total_energy_charged:  1039.411722706301
average_user_satisfaction:  0.9517911111111113
energy_user_satisfaction:  -10000000.0
reward:  -269.752139


### TRPO

In [7]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='TRPO',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/TRPO


In [8]:
run_dir=models_folder+"/TRPO/"
model = TRPO.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/TRPO/
total_ev_served:  46.0
total_profits:  -173.12676447376012
real_profits (no flexibility):  -173.12676447376
Up_flexibility (kWh):  2842.9689485184763
Down_flexibility (kWh):  741.3138986537906
total_energy_charged:  811.9861584854725
average_user_satisfaction:  0.8457115198969695
energy_user_satisfaction:  -10000000.0
reward:  -523.530841


### TD3

In [9]:
env_rep = EV2Gym(config_file,
             load_from_replay_path='./replay/replay_test.pkl',
             render_mode=False,
             save_plots=True,
             save_replay=False,
             save_mat=True,
             overwrite_name='TD3',
             state_function=our_state,
             reward_function=our_reward,
             flex_multiplier=c)

Creating directory: ./results/TD3


In [10]:
run_dir=models_folder+"/TD3/"
model = TD3.load(run_dir+"best_model.zip")
model.set_env(env_rep)
evaluate_model(model,1)

Plotting simulation data at ./results/TD3/
total_ev_served:  46.0
total_profits:  -225.34958733820187
real_profits (no flexibility):  -225.34958733820199
Up_flexibility (kWh):  1660.1115079776457
Down_flexibility (kWh):  998.3555415329049
total_energy_charged:  1068.0242904451716
average_user_satisfaction:  0.9599677777777778
energy_user_satisfaction:  -10000000.0
reward:  -234.923165
