#### Create the environment 

In [1]:
# In your_notebook.ipynb
from env_creator import energy_management_env_creator
from energy_management_env import EnergyManagementEnv
import gym

%run register_env.py

# Register the environment
energy_management_env_creator(SOC_min=0.2, SOC_max=0.8, E=1000, lambda_val=10, data_path='Data_input_v2.csv', initial_SOC=0.5)

# Create the environment
env = gym.make('EnergyManagement-v0')

##### Test the environment

In [2]:
import gym
# Run a simulation loop
observation = env.reset()
for t in range(10):
    # Print information before and after taking action
    print(f"Step {t + 1}:")
    
    # Before Action
    print(f"  Before Action: Demand={observation[0]:.2f}, Price={observation[2]:.2f}, SOC={observation[1]:.2f}", end="")
    
    # Take a random action
    action = env.action_space.sample()
    observation, reward, done, _ = env.step(action)

    # After Action
    print(f"  |  After Action: Action={action:.2f}, Reward={reward:.2e}, New SOC={observation[1]:.2f}")
    print()

env.close()

Step 1:
  Before Action: Demand=174.72, Price=23.96, SOC=0.50  |  After Action: Action=0.00, Reward=-1.79e+01, New SOC=0.40

Step 2:
  Before Action: Demand=163.20, Price=22.80, SOC=0.40  |  After Action: Action=2.00, Reward=-6.00e+01, New SOC=0.50

Step 3:
  Before Action: Demand=150.72, Price=20.79, SOC=0.50  |  After Action: Action=0.00, Reward=-1.05e+01, New SOC=0.40

Step 4:
  Before Action: Demand=148.80, Price=19.87, SOC=0.40  |  After Action: Action=1.00, Reward=-2.96e+01, New SOC=0.40

Step 5:
  Before Action: Demand=149.76, Price=19.33, SOC=0.40  |  After Action: Action=0.00, Reward=-9.62e+00, New SOC=0.30

Step 6:
  Before Action: Demand=148.80, Price=18.90, SOC=0.30  |  After Action: Action=1.00, Reward=-2.81e+01, New SOC=0.30

Step 7:
  Before Action: Demand=142.08, Price=17.82, SOC=0.30  |  After Action: Action=0.00, Reward=-7.50e+00, New SOC=0.20

Step 8:
  Before Action: Demand=144.00, Price=19.76, SOC=0.20  |  After Action: Action=2.00, Reward=-4.82e+01, New SOC=0.30



  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


##### PPO

In [4]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

# Register the custom environment (assuming you have already registered it)
env_id = 'EnergyManagement-v0'

# Define a function to create the environment
def make_env():
    return gym.make(env_id)

# Create the vectorized environment with 8 parallel environments using DummyVecEnv
num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])

# Wrap the vectorized environment with VecNormalize
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("ppo_energy_management")

del model  # Remove to demonstrate saving and loading

model = PPO.load("ppo_energy_management")
print("Training Done")




Using cpu device


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


------------------------------
| time/              |       |
|    fps             | 3786  |
|    iterations      | 1     |
|    time_elapsed    | 4     |
|    total_timesteps | 16384 |
------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 1062      |
|    iterations           | 2         |
|    time_elapsed         | 30        |
|    total_timesteps      | 32768     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.1      |
|    explained_variance   | 0         |
|    learning_rate        | 0.0003    |
|    loss                 | 5.18e+15  |
|    n_updates            | 10        |
|    policy_gradient_loss | -9.08e-08 |
|    value_loss           | 9.67e+15  |
---------------------------------------
---------------------------------------
| time/                

  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


  |  After Action: Action=0.00, Reward=-2.59e+01, New SOC=0.20

Step 184:
  Before Action: Demand=229.44, Price=32.72, SOC=0.20  |  After Action: Action=2.00, Reward=-1.08e+02, New SOC=0.30

Step 185:
  Before Action: Demand=220.80, Price=42.02, SOC=0.30  |  After Action: Action=1.00, Reward=-9.28e+01, New SOC=0.30

Step 186:
  Before Action: Demand=224.64, Price=56.96, SOC=0.30  |  After Action: Action=1.00, Reward=-1.28e+02, New SOC=0.30

Step 187:
  Before Action: Demand=225.60, Price=61.93, SOC=0.30  |  After Action: Action=2.00, Reward=-2.02e+02, New SOC=0.40

Step 188:
  Before Action: Demand=212.16, Price=49.03, SOC=0.40  |  After Action: Action=2.00, Reward=-1.53e+02, New SOC=0.50

Step 189:
  Before Action: Demand=207.36, Price=35.50, SOC=0.50  |  After Action: Action=0.00, Reward=-3.81e+01, New SOC=0.40

Step 190:
  Before Action: Demand=210.24, Price=27.83, SOC=0.40  |  After Action: Action=0.00, Reward=-3.07e+01, New SOC=0.30

Step 191:
  Before Action: Demand=216.00, Price

In [None]:
# Assuming you have already registered the new environment with the id 'EnergyManagement-v0'
env = gym.make('EnergyManagement-v0')

# Run a simulation loop
observation = env.reset()
for t in range(1000):
    # Print information before and after taking action
    print(f"Step {t + 1}:")

    # Before Action
    print(f"  Before Action: Demand={observation[0]:.2f}, Price={observation[2]:.2f}, SOC={observation[1]:.2f}", end="")

    # Take an action predicted by the model
    action, _states = model.predict(observation)

    # Perform the action in the environment
    observation, reward, done, _ = env.step(action)

    # After Action
    print(f"  |  After Action: Action={action:.2f}, Reward={reward:.2e}, New SOC={observation[1]:.2f}")
    print()

env.close()

##### SAC

In [None]:
import gym
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

def make_env():
    return gym.make(env_id)

num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = SAC("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("sac_energy_management")
print("Training Done")


SyntaxError: invalid decimal literal (3884865547.py, line 2)

In [None]:
import gym
from stable_baselines3 import SAC
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

env = gym.make(env_id)
model = SAC.load("sac_energy_management")

observation = env.reset()
for t in range(1000):
    print(f"Step {t + 1}:")
    print(f"  Before Action: Demand={observation[0]:.2f}, Price={observation[2]:.2f}, SOC={observation[1]:.2f}", end="")
    action, _states = model.predict(observation)
    observation, reward, done, _ = env.step(action)
    print(f"  |  After Action: Action={action:.2f}, Reward={reward:.2e}, New SOC={observation[1]:.2f}")
    print()

env.close()


Step 1:
  Before Action: Demand=632.88, Price=22.77, SOC=0.50  |  After Action: Action=-0.10, Reward=-1.21e+02, New SOC=0.40

Step 2:
  Before Action: Demand=546.84, Price=22.95, SOC=0.40  |  After Action: Action=-0.10, Reward=-1.03e+02, New SOC=0.30

Step 3:
  Before Action: Demand=494.46, Price=20.45, SOC=0.30  |  After Action: Action=-0.10, Reward=-1.00e+04, New SOC=0.20

Step 4:
  Before Action: Demand=541.08, Price=19.80, SOC=0.20  |  After Action: Action=-0.10, Reward=-1.00e+04, New SOC=0.10

Step 5:
  Before Action: Demand=533.70, Price=18.78, SOC=0.10  |  After Action: Action=-0.10, Reward=-1.00e+04, New SOC=-0.00

Step 6:
  Before Action: Demand=519.12, Price=17.63, SOC=-0.00  |  After Action: Action=-0.10, Reward=-1.00e+04, New SOC=-0.10

Step 7:
  Before Action: Demand=618.48, Price=20.81, SOC=-0.10  |  After Action: Action=-0.10, Reward=-1.00e+04, New SOC=-0.20

Step 8:
  Before Action: Demand=725.76, Price=21.61, SOC=-0.20  |  After Action: Action=-0.10, Reward=-1.00e+04, 

##### A2C

In [None]:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

def make_env():
    return gym.make(env_id)

num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = A2C("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("a2c_energy_management")
print("Training Done")


In [None]:
import gym
from stable_baselines3 import A2C
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

env = gym.make(env_id)
model = A2C.load("a2c_energy_management")

observation = env.reset()
for t in range(1000):
    print(f"Step {t + 1}:")
    print(f"  Before Action: Demand={observation[0]:.2f}, Price={observation[2]:.2f}, SOC={observation[1]:.2f}", end="")
    action, _states = model.predict(observation)
    observation, reward, done, _ = env.step(action)
    print(f"  |  After Action: Action={action:.2f}, Reward={reward:.2e}, New SOC={observation[1]:.2f}")
    print()

env.close()


##### A3C

In [None]:
import gym
from stable_baselines3 import A3C
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

def make_env():
    return gym.make(env_id)

num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = A3C("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("a3c_energy_management")
print("Training Done")


  result = entry_point.load(False)


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 5177 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------


In [None]:
import gym
from stable_baselines3 import A3C
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

env = gym.make(env_id)
model = A3C.load("a3c_energy_management")

observation = env.reset()
for t in range(1000):
    print(f"Step {t + 1}:")
    print(f"  Before Action: Demand={observation[0]:.2f}, Price={observation[2]:.2f}, SOC={observation[1]:.2f}", end="")
    action, _states = model.predict(observation)
    observation, reward, done, _ = env.step(action)
    print(f"  |  After Action: Action={action:.2f}, Reward={reward:.2e}, New SOC={observation[1]:.2f}")
    print()

env.close()


ImportError: cannot import name 'DummyVecEnv' from 'stable_baselines3.common.envs' (C:\Users\mf36244\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\stable_baselines3\common\envs\__init__.py)

##### TD3 

In [None]:
import gym
from stable_baselines3 import TD3
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

def make_env():
    return gym.make(env_id)

num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = TD3("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("td3_energy_management")
print("Training Done")


In [None]:
import gym
from stable_baselines3 import TD3
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

env = gym.make(env_id)
model = TD3.load("td3_energy_management")

observation = env.reset()
for t in range(1000):
    print(f"Step {t + 1}:")
    print(f"  Before Action: Demand={observation[0]:.2f}, Price={observation[2]:.2f}, SOC={observation[1]:.2f}", end="")
    action, _states = model.predict(observation)
    observation, reward, done, _ = env.step(action)
    print(f"  |  After Action: Action={action:.2f}, Reward={reward:.2e}, New SOC={observation[1]:.2f}")
    print()

env.close()


In [None]:
import gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

def make_env():
    return gym.make(env_id)

num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = DQN("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("dqn_energy_management")
print("Training Done")


##### DQN

In [None]:
import gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from energy_management_env import EnergyManagementEnv

env_id = 'EnergyManagement-v0'

def make_env():
    return gym.make(env_id)

num_envs = 8
vec_env = DummyVecEnv([make_env for _ in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.0)

model = DQN("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1e6)
model.save("dqn_energy_management")
print("Training Done")
