# Training RL agents with ErNESTO-gym

The notebooks provides a simple template to train and save the models of renowned RL agents.<br>
Refer to the `testing.ipynb` notebook to test these models and compare them with deterministic strategies.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ../..

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
from tqdm import tqdm
from gymnasium.utils.env_checker import check_env

from ernestogym.envs import MicroGridEnv
from ernestogym.envs.single_agent.utils import parameter_generator

sns.set_style('darkgrid')
plot_colors = sns.color_palette()
sns.set(font_scale=1.5)

In [None]:
def plot_rewards(timestamps: list, res: dict, reward_type='weighted_reward', sampling_rate=10):
    fig, ((ax1), (ax2), (ax3)) = plt.subplots(3, 1, figsize=(12, 9), tight_layout=True)
    
    trad_list = res[reward_type]['r_trad']   
    op_list = res[reward_type]['r_op']     
    clip_list = res[reward_type]['r_clip']     
    
    ax1.plot(timestamps[::sampling_rate], trad_list[::sampling_rate])
    ax1.set(xlabel='Samples', ylabel='R_trad')         

    ax2.plot(timestamps[::sampling_rate], op_list[::sampling_rate])
    ax2.set(xlabel='Samples', ylabel='R_op') 

    ax3.plot(timestamps[::sampling_rate], clip_list[::sampling_rate])
    ax3.set(xlabel='Samples', ylabel='R_clip')        

def plot_cum_rewards(timestamps: list, res: dict, reward_type='weighted_reward', sampling_rate=10):
    fig, ((ax1), (ax2), (ax3)) = plt.subplots(3, 1, figsize=(12, 9), tight_layout=True)
    
    trad_list = np.cumsum(res[reward_type]['r_trad'])
    op_list = np.cumsum(res[reward_type]['r_op'])
    clip_list = np.cumsum(res[reward_type]['r_clip'])     
    
    ax1.plot(timestamps[::sampling_rate], trad_list[::sampling_rate])
    ax1.set(xlabel='Samples', ylabel='R_trad')         

    ax2.plot(timestamps[::sampling_rate], op_list[::sampling_rate])
    ax2.set(xlabel='Samples', ylabel='R_op') 

    ax3.plot(timestamps[::sampling_rate], clip_list[::sampling_rate])
    ax3.set(xlabel='Samples', ylabel='R_clip')        

## Environment Information

In [None]:
pack_options = "ernestogym/ernesto/data/battery/pack.yaml"
# ecm = "ernestogym/ernesto/data/battery/models/electrical/thevenin_pack.yaml"
ecm = "ernestogym/ernesto/data/battery/models/electrical/thevenin_fading_pack.yaml"
r2c = "ernestogym/ernesto/data/battery/models/thermal/r2c_thermal_pack.yaml"
bolun = "ernestogym/ernesto/data/battery/models/aging/bolun_pack.yaml"
# world = "ernestogym/envs/single_agent/world_deg.yaml"
world = "ernestogym/envs/single_agent/world_fading.yaml"

params = parameter_generator(
    battery_options=pack_options,
    electrical_model=ecm,
    thermal_model=r2c,
    aging_model=bolun,
    world_options=world,
    use_reward_normalization=True
    
)

In [None]:
# Create environment
env = MicroGridEnv(settings=params)

print('Size of State Space: ', env.observation_space.shape)
print('Observation Space: ', env.spaces.keys())
print('Size of Action Space: ', env.action_space.shape)
print('Min action: ', env.action_space.low)
print('Max action: ', env.action_space.high)
print('Sample State: ', env.observation_space.sample())
print('Sample Action: ', env.action_space.sample())

## Experiment settings

In [None]:
#num_steps = len(env.demand)
num_steps = 100000

## PPO Agent

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.callbacks import CheckpointCallback

In [None]:
# Save a checkpoint every 1000 steps
checkpoint_callback = CheckpointCallback(
    save_freq=num_steps,
    save_path="./examples/single_agent/models/",
    name_prefix="ppo",
    )

In [None]:
model = PPO(MlpPolicy, env, verbose=False, gamma=0.9)

model.learn(total_timesteps=num_steps,
            progress_bar=True, 
            reset_num_timesteps=False,
            callback=[checkpoint_callback],
            )

## A2C Agent

In [None]:
from stable_baselines3 import A2C
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.callbacks import CheckpointCallback

In [None]:
# Save a checkpoint every 1000 steps
checkpoint_callback = CheckpointCallback(
    save_freq=num_steps,
    save_path="./examples/single_agent/models/",
    name_prefix="a2c",
    )

In [None]:
model = A2C(MlpPolicy, env, verbose=False, gamma=0.9)

model.learn(total_timesteps=num_steps,
            progress_bar=True, 
            reset_num_timesteps=False,
            callback=[checkpoint_callback],
            )