In [1]:
# Import necessary libraries
import os
import gym
from environments.energy_management_env import EnergyManagementEnv
import numpy as np
import matplotlib.pyplot as plt
import pickle # save every new element in pickle

# Import the Agent class from your code
num_envs = 8
num_runs = 5
epochs = 2000
gamma = 1
T = 720

# Import environment registration function
from environments.env_registration import register_env

# Define environment parameters
env_params = {
    'SOC_min': 0.2,
    'SOC_max': 0.8,
    'E': 1000,
    'lambda_val': 0.1,
    'data_path': 'data/Data_input.csv',
    'initial_SOC': 0.5  # Set to None if not using an initial_SOC
}

# Register the custom environment
register_env('EnergyManagement-v0', 'environments.env_registration:environment_creator',{'environment_class': EnergyManagementEnv, **env_params})

from rl_monitoring_utils.vectorized_env_wrapper import VectorizedEnvWrapper
from policies.categorical_policy import CategoricalPolicy
from learning_utils.value_estimator import ValueEstimator
from agents.a2c import A2C
from agents.a2c_warm_start import A2C_WarmStart
from agents.ppo import PPO
from agents.reinforce import REINFORCE
import gym


energy_management = VectorizedEnvWrapper(gym.make("EnergyManagement-v0"), num_envs=8)

def run_experiment(env, policy_class, agent_class, reward_scale, epochs, gamma, T, num_runs):
    totals = []
    for _ in range(num_runs):
        env.reward_scale = reward_scale  # Set the reward scale for the environment
        policy = policy_class(env, lr=1e-2)
        value_estimator = ValueEstimator(env, lr=1e-2)
        agent, total_rewards = agent_class(env, policy, value_estimator, epochs=epochs, gamma=gamma, T=T)
        totals.append(total_rewards)
    return totals

reward_scales = [1, 125, 1250, 12500, 125000]

results = {}
energy_management = VectorizedEnvWrapper(gym.make("EnergyManagement-v0"), num_envs=num_envs)

# Run experiments for both A2C and PPO with varying reward scales
for reward_scale in reward_scales:
    for agent_class, agent_name in [(A2C, 'A2C'), (PPO, 'PPO')]:
        label = f'{agent_name}_{reward_scale}'
        print(f"Running {label}")
        results[label] = run_experiment(energy_management, CategoricalPolicy, agent_class, reward_scale, epochs, gamma, T, num_runs)
        # create pickle file first then dump
        with open(f"results/{label}.pkl", "wb") as f:
            pickle.dump(results[label], f)

# Plot results
fig, ax = plt.subplots()
for label, data in results.items():
    means = np.mean(data, axis=0)
    stddev = np.std(data, axis=0)
    epochs_range = range(len(means))
    ax.plot(epochs_range, means, label=label)
    ax.fill_between(epochs_range, means - stddev, means + stddev, alpha=0.1)
ax.set_title('Performance with Different Reward Scales')
ax.set_xlabel('Epoch')
ax.set_ylabel('Total Reward')
ax.legend()
plt.show()


Running A2C_1
Running PPO_1.371561963988785
Running A2C_12581056864208785
Running PPO_12574861964308785
Running A2C_12500677064708784
Running PPO_12505109564028784
1979/2000:-12.397009264328785

In [None]:
# load from pickle and plot the same thing
import pickle
import numpy as np
import matplotlib.pyplot as plt

results = {}
reward_scales = [1, 125, 1250, 12500, 125000]
for reward_scale in reward_scales:
    for agent_class, agent_name in [(A2C, 'A2C'), (PPO, 'PPO')]:
        label = f'{agent_name}_{reward_scale}'
        with open(f"results/{label}.pkl", 'rb') as f:
            results[label] = pickle.load(f)