In [1]:
import os
import sys
import gym4real
import gymnasium as gym
from gymnasium import spaces
from gym4real.envs.wds.utils import parameter_generator
from gym4real.envs.wds.reward_scaling_wrapper import RewardScalingWrapper
import wntr
import wntr.sim
from DQN import DQN_Implementation, Double_DQN_Implementation
from gym4real.envs.wds.Normalise import NormaliseObservation

  from pkg_resources import resource_filename


In [2]:
config_path = os.path.join(os.getcwd(), "gym4real", "envs", "wds", "world_anytown.yaml")

base_params = parameter_generator(
    hydraulic_step=3600,
    duration=604800,
    seed=42,
    world_options=config_path)

In [3]:
# Environment using SMA
base_params['demand_moving_average'] = True  # Turn on SMA 
base_params['demand_exp_moving_average'] = False  # Turn off EMA 

In [4]:
def make_env():
    env = gym.make('gym4real/wds-v0', settings=base_params)
    env = RewardScalingWrapper(env)
    env = NormaliseObservation(env)
    
    return env

In [5]:
dqn_sma_env = make_env()

dqn_sma_model = DQN_Implementation(
    env=dqn_sma_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

dqn_sma_model.learn(total_timesteps=200000)

dqn_sma_model.save("dqn-sma-norm")

  gym.logger.warn(
  gym.logger.warn(


Train for 200000 steps
Resetting the environment...
Step: 212 | Episode Reward: 132.248 | Epsilon: 1.000
Resetting the environment...
Step: 427 | Episode Reward: 129.626 | Epsilon: 0.990
Resetting the environment...
Step: 639 | Episode Reward: 124.060 | Epsilon: 0.980
Resetting the environment...
Step: 856 | Episode Reward: 135.224 | Epsilon: 0.970
Resetting the environment...
Step: 1082 | Episode Reward: 128.414 | Epsilon: 0.961
Resetting the environment...
Step: 1304 | Episode Reward: 123.879 | Epsilon: 0.951
Resetting the environment...
Step: 1516 | Episode Reward: 135.536 | Epsilon: 0.941
Resetting the environment...
Step: 1739 | Episode Reward: 124.234 | Epsilon: 0.932
Resetting the environment...
Step: 1959 | Episode Reward: 128.940 | Epsilon: 0.923
Resetting the environment...
Step: 2161 | Episode Reward: 110.416 | Epsilon: 0.914
Resetting the environment...
Step: 2387 | Episode Reward: 127.194 | Epsilon: 0.904
Resetting the environment...
Step: 2595 | Episode Reward: 122.316 | 



Step: 8423 | Episode Reward: 131.004 | Epsilon: 0.689
Resetting the environment...
Step: 8615 | Episode Reward: 139.148 | Epsilon: 0.683
Resetting the environment...
Step: 8816 | Episode Reward: 141.649 | Epsilon: 0.676
Resetting the environment...
Step: 9022 | Episode Reward: 136.751 | Epsilon: 0.669
Resetting the environment...
Step: 9229 | Episode Reward: 131.002 | Epsilon: 0.662
Resetting the environment...
Step: 9497 | Episode Reward: 132.209 | Epsilon: 0.656
Resetting the environment...
Step: 9702 | Episode Reward: 136.375 | Epsilon: 0.649
Resetting the environment...
Step: 9923 | Episode Reward: 134.505 | Epsilon: 0.643
Resetting the environment...
Step: 10124 | Episode Reward: 136.453 | Epsilon: 0.636
Resetting the environment...
Step: 10322 | Episode Reward: 140.027 | Epsilon: 0.630
Resetting the environment...
Step: 10512 | Episode Reward: 144.886 | Epsilon: 0.624
Resetting the environment...
Step: 10724 | Episode Reward: 132.531 | Epsilon: 0.617
Resetting the environment...




Step: 12319 | Episode Reward: 135.045 | Epsilon: 0.575
Resetting the environment...
Step: 12517 | Episode Reward: 140.788 | Epsilon: 0.570
Resetting the environment...
Step: 12767 | Episode Reward: 139.667 | Epsilon: 0.564
Resetting the environment...
Step: 12958 | Episode Reward: 146.400 | Epsilon: 0.558
Resetting the environment...
Step: 13152 | Episode Reward: 136.983 | Epsilon: 0.553
Resetting the environment...
Step: 13349 | Episode Reward: 137.003 | Epsilon: 0.547
Resetting the environment...
Step: 13559 | Episode Reward: 128.793 | Epsilon: 0.542
Resetting the environment...
Step: 13766 | Episode Reward: 127.279 | Epsilon: 0.536
Resetting the environment...
Step: 14018 | Episode Reward: 133.421 | Epsilon: 0.531
Resetting the environment...
Step: 14233 | Episode Reward: 135.847 | Epsilon: 0.526
Resetting the environment...
Step: 14450 | Episode Reward: 134.900 | Epsilon: 0.520
Resetting the environment...
Step: 14638 | Episode Reward: 143.880 | Epsilon: 0.515
Resetting the environ



Step: 22120 | Episode Reward: 145.742 | Epsilon: 0.352
Resetting the environment...
Step: 22344 | Episode Reward: 141.609 | Epsilon: 0.348
Resetting the environment...
Step: 22527 | Episode Reward: 145.431 | Epsilon: 0.345
Resetting the environment...
Step: 22721 | Episode Reward: 139.758 | Epsilon: 0.341
Resetting the environment...
Step: 22906 | Episode Reward: 146.726 | Epsilon: 0.338
Resetting the environment...
Step: 23091 | Episode Reward: 142.546 | Epsilon: 0.334
Resetting the environment...
Step: 23271 | Episode Reward: 148.188 | Epsilon: 0.331
Resetting the environment...
Step: 23466 | Episode Reward: 144.974 | Epsilon: 0.328
Resetting the environment...
Step: 23654 | Episode Reward: 145.041 | Epsilon: 0.324
Resetting the environment...
Step: 23840 | Episode Reward: 141.461 | Epsilon: 0.321
Resetting the environment...
Step: 24031 | Episode Reward: 135.391 | Epsilon: 0.318
Resetting the environment...
Step: 24239 | Episode Reward: 150.539 | Epsilon: 0.315
Resetting the environ

In [6]:
ddqn_sma_env = make_env()

ddqn_sma_model = Double_DQN_Implementation(
    env=ddqn_sma_env, 
    learning_rate=1e-4, 
    tensorboard_log="./wds_custom_logs/")

ddqn_sma_model.learn(total_timesteps=200000)

ddqn_sma_model.save("ddqn-sma-norm")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 215 | Episode Reward: 122.886 | Epsilon: 1.000
Resetting the environment...
Step: 448 | Episode Reward: 120.553 | Epsilon: 0.990
Resetting the environment...
Step: 662 | Episode Reward: 129.333 | Epsilon: 0.980
Resetting the environment...
Step: 881 | Episode Reward: 128.818 | Epsilon: 0.970
Resetting the environment...
Step: 1099 | Episode Reward: 131.565 | Epsilon: 0.961
Resetting the environment...
Step: 1313 | Episode Reward: 125.147 | Epsilon: 0.951
Resetting the environment...
Step: 1535 | Episode Reward: 125.987 | Epsilon: 0.941
Resetting the environment...
Step: 1773 | Episode Reward: 119.007 | Epsilon: 0.932
Resetting the environment...
Step: 1997 | Episode Reward: 128.471 | Epsilon: 0.923
Resetting the environment...
Step: 2193 | Episode Reward: 114.334 | Epsilon: 0.914
Resetting the environment...
Step: 2405 | Episode Reward: 134.092 | Epsilon: 0.904
Resetting the environment...




Step: 2623 | Episode Reward: 126.410 | Epsilon: 0.895
Resetting the environment...
Step: 2852 | Episode Reward: 130.803 | Epsilon: 0.886
Resetting the environment...




Step: 3065 | Episode Reward: 129.391 | Epsilon: 0.878
Resetting the environment...
Step: 3265 | Episode Reward: 117.658 | Epsilon: 0.869
Resetting the environment...
Step: 3507 | Episode Reward: 106.759 | Epsilon: 0.860
Resetting the environment...
Step: 3734 | Episode Reward: 130.811 | Epsilon: 0.851
Resetting the environment...
Step: 3939 | Episode Reward: 136.902 | Epsilon: 0.843
Resetting the environment...
Step: 4145 | Episode Reward: 136.277 | Epsilon: 0.835
Resetting the environment...
Step: 4372 | Episode Reward: 126.746 | Epsilon: 0.826
Resetting the environment...
Step: 4578 | Episode Reward: 126.364 | Epsilon: 0.818
Resetting the environment...
Step: 4802 | Episode Reward: 136.401 | Epsilon: 0.810
Resetting the environment...
Step: 5040 | Episode Reward: 124.409 | Epsilon: 0.802
Resetting the environment...
Step: 5260 | Episode Reward: 137.822 | Epsilon: 0.794
Resetting the environment...
Step: 5464 | Episode Reward: 132.336 | Epsilon: 0.786
Resetting the environment...
Step

In [7]:
# Environment using EMA
base_params['demand_moving_average'] = False  # Turn off SMA 
base_params['demand_exp_moving_average'] = True  # Turn on EMA 

In [8]:
dqn_ema_env = make_env()

dqn_ema_model = DQN_Implementation(
    env=dqn_ema_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

dqn_ema_model.learn(total_timesteps=200000)

dqn_ema_model.save("dqn-ema-norm")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 211 | Episode Reward: 132.772 | Epsilon: 1.000
Resetting the environment...
Step: 447 | Episode Reward: 129.863 | Epsilon: 0.990
Resetting the environment...
Step: 666 | Episode Reward: 131.779 | Epsilon: 0.980
Resetting the environment...
Step: 882 | Episode Reward: 130.008 | Epsilon: 0.970
Resetting the environment...
Step: 1107 | Episode Reward: 126.439 | Epsilon: 0.961
Resetting the environment...
Step: 1331 | Episode Reward: 124.156 | Epsilon: 0.951
Resetting the environment...
Step: 1562 | Episode Reward: 129.757 | Epsilon: 0.941
Resetting the environment...
Step: 1782 | Episode Reward: 116.673 | Epsilon: 0.932
Resetting the environment...
Step: 2013 | Episode Reward: 130.500 | Epsilon: 0.923
Resetting the environment...
Step: 2211 | Episode Reward: 120.243 | Epsilon: 0.914
Resetting the environment...
Step: 2420 | Episode Reward: 134.295 | Epsilon: 0.904
Resetting the environment...




Step: 2651 | Episode Reward: 119.553 | Epsilon: 0.895
Resetting the environment...
Step: 2866 | Episode Reward: 135.417 | Epsilon: 0.886
Resetting the environment...




Step: 3064 | Episode Reward: 128.728 | Epsilon: 0.878
Resetting the environment...
Step: 3267 | Episode Reward: 120.844 | Epsilon: 0.869
Resetting the environment...
Step: 3510 | Episode Reward: 101.925 | Epsilon: 0.860
Resetting the environment...
Step: 3730 | Episode Reward: 135.489 | Epsilon: 0.851
Resetting the environment...
Step: 3938 | Episode Reward: 136.083 | Epsilon: 0.843
Resetting the environment...
Step: 4147 | Episode Reward: 140.499 | Epsilon: 0.835
Resetting the environment...
Step: 4363 | Episode Reward: 126.575 | Epsilon: 0.826
Resetting the environment...
Step: 4581 | Episode Reward: 130.976 | Epsilon: 0.818
Resetting the environment...
Step: 4792 | Episode Reward: 135.188 | Epsilon: 0.810
Resetting the environment...
Step: 5025 | Episode Reward: 127.188 | Epsilon: 0.802
Resetting the environment...
Step: 5230 | Episode Reward: 137.863 | Epsilon: 0.794
Resetting the environment...
Step: 5438 | Episode Reward: 130.847 | Epsilon: 0.786
Resetting the environment...
Step



Step: 14002 | Episode Reward: 132.911 | Epsilon: 0.531
Resetting the environment...
Step: 14211 | Episode Reward: 139.813 | Epsilon: 0.526
Resetting the environment...
Step: 14441 | Episode Reward: 135.888 | Epsilon: 0.520
Resetting the environment...
Step: 14630 | Episode Reward: 143.025 | Epsilon: 0.515
Resetting the environment...
Step: 14824 | Episode Reward: 141.727 | Epsilon: 0.510
Resetting the environment...
Step: 15031 | Episode Reward: 131.550 | Epsilon: 0.505
Resetting the environment...
Step: 15228 | Episode Reward: 139.970 | Epsilon: 0.500
Resetting the environment...
Step: 15453 | Episode Reward: 136.049 | Epsilon: 0.495
Resetting the environment...
Step: 15659 | Episode Reward: 143.758 | Epsilon: 0.490
Resetting the environment...
Step: 15853 | Episode Reward: 141.916 | Epsilon: 0.485
Resetting the environment...
Step: 16043 | Episode Reward: 136.268 | Epsilon: 0.480
Resetting the environment...
Step: 16247 | Episode Reward: 138.230 | Epsilon: 0.475
Resetting the environ

In [9]:
ddqn_ema_env = make_env()

ddqn_ema_model = Double_DQN_Implementation(
    env=ddqn_ema_env, 
    learning_rate=1e-4, 
    tensorboard_log="./wds_custom_logs/")

ddqn_ema_model.learn(total_timesteps=200000)

ddqn_ema_model.save("ddqn-ema-norm")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 215 | Episode Reward: 126.857 | Epsilon: 1.000
Resetting the environment...
Step: 446 | Episode Reward: 126.482 | Epsilon: 0.990
Resetting the environment...
Step: 650 | Episode Reward: 130.731 | Epsilon: 0.980
Resetting the environment...
Step: 870 | Episode Reward: 129.962 | Epsilon: 0.970
Resetting the environment...
Step: 1091 | Episode Reward: 126.095 | Epsilon: 0.961
Resetting the environment...
Step: 1311 | Episode Reward: 121.803 | Epsilon: 0.951
Resetting the environment...
Step: 1542 | Episode Reward: 132.226 | Epsilon: 0.941
Resetting the environment...
Step: 1773 | Episode Reward: 126.633 | Epsilon: 0.932
Resetting the environment...
Step: 1985 | Episode Reward: 129.316 | Epsilon: 0.923
Resetting the environment...
Step: 2175 | Episode Reward: 105.944 | Epsilon: 0.914
Resetting the environment...
Step: 2390 | Episode Reward: 130.727 | Epsilon: 0.904
Resetting the environment...
Step: 2614 | Episode Reward: 127.755 | Epsilon: 0.895
Resetting the environment...
Step: 28