In [7]:
import os
import sys
import gym4real
import gymnasium as gym
from gymnasium import spaces
from gym4real.envs.wds.utils import parameter_generator
import wntr
import wntr.sim
from gym4real.envs.wds.reward_scaling_wrapper import RewardScalingWrapper
from dueling_dqn import DQN_Implementation, Double_DQN_Implementation, Dueling_DQN_Implementation, Distributional_DQN_Implementation
from Normalise import NormaliseObservation

In [8]:
config_path = os.path.join(os.getcwd(), "gym4real", "envs", "wds", "world_anytown.yaml")

In [9]:
config_path

'/Users/kamalrajanisrani/Documents/MSc Advanced Machine Learning/Semester 1/Reinforcement Learning/gym4ReaL/gym4real/envs/wds/world_anytown.yaml'

In [10]:
base_params = parameter_generator(
    hydraulic_step=3600,
    duration=604800,
    seed=42,
    world_options=config_path)

In [11]:
# Environment using SMA
base_params['demand_moving_average'] = True  # Turn on SMA 
base_params['demand_exp_moving_average'] = False  # Turn off EMA 

In [12]:
def make_env():
    env = gym.make('gym4real/wds-v0', settings=base_params)
    env = RewardScalingWrapper(env)
    env = NormaliseObservation(env)
    return env

In [13]:
dqn_sma_env = make_env()

  gym.logger.warn(
  gym.logger.warn(


In [12]:
# dueling_sma_model = Dueling_DQN_Implementation(
#     env=dqn_sma_env,
#     learning_rate=3e-4,
#     tensorboard_log="./wds_dueling_logs/"
# )


In [14]:
dist_sma_model = Distributional_DQN_Implementation(
    env=dqn_sma_env,
    learning_rate=3e-4,
    tensorboard_log="./wds_dist_logs/",
    n_atoms=51,
    v_min=-10,
    v_max=10
)


In [15]:
dist_sma_model.learn(total_timesteps=200000)
dist_sma_model.save("distributional-dqn-normalisation-sma")


Train for 200000 steps | device=cpu
Resetting the environment...
Step: 1111 | Episode Reward: 153.997 | Epsilon: 1.000
Resetting the environment...
Step: 2282 | Episode Reward: 151.169 | Epsilon: 0.995
Resetting the environment...
Step: 3408 | Episode Reward: 151.317 | Epsilon: 0.990
Resetting the environment...
Step: 4545 | Episode Reward: 153.626 | Epsilon: 0.985
Resetting the environment...
Step: 5693 | Episode Reward: 153.360 | Epsilon: 0.980
Resetting the environment...
Step: 6842 | Episode Reward: 148.326 | Epsilon: 0.975
Resetting the environment...
Step: 8000 | Episode Reward: 152.249 | Epsilon: 0.970
Resetting the environment...
Step: 9242 | Episode Reward: 133.343 | Epsilon: 0.966
Resetting the environment...
Step: 10433 | Episode Reward: 146.349 | Epsilon: 0.961
Resetting the environment...
Step: 11503 | Episode Reward: 132.059 | Epsilon: 0.956
Resetting the environment...
Step: 12634 | Episode Reward: 152.668 | Epsilon: 0.951
Resetting the environment...




Step: 13794 | Episode Reward: 140.755 | Epsilon: 0.946
Resetting the environment...
Step: 14950 | Episode Reward: 155.198 | Epsilon: 0.942
Resetting the environment...




Step: 16071 | Episode Reward: 144.100 | Epsilon: 0.937
Resetting the environment...
Step: 17212 | Episode Reward: 143.889 | Epsilon: 0.932
Resetting the environment...
Step: 18652 | Episode Reward: 109.176 | Epsilon: 0.928
Resetting the environment...
Step: 19772 | Episode Reward: 147.424 | Epsilon: 0.923
Resetting the environment...
Step: 20861 | Episode Reward: 160.615 | Epsilon: 0.918
Resetting the environment...
Step: 21947 | Episode Reward: 160.407 | Epsilon: 0.914
Resetting the environment...
Step: 23080 | Episode Reward: 148.234 | Epsilon: 0.909
Resetting the environment...
Step: 24235 | Episode Reward: 146.665 | Epsilon: 0.905
Resetting the environment...
Step: 25325 | Episode Reward: 160.068 | Epsilon: 0.900
Resetting the environment...
Step: 26565 | Episode Reward: 131.963 | Epsilon: 0.896
Resetting the environment...
Step: 27670 | Episode Reward: 159.023 | Epsilon: 0.891
Resetting the environment...
Step: 28764 | Episode Reward: 151.119 | Epsilon: 0.887
Resetting the environ



Step: 62886 | Episode Reward: 98.508 | Epsilon: 0.771
Resetting the environment...
Step: 64085 | Episode Reward: 162.620 | Epsilon: 0.767
Resetting the environment...
Step: 65178 | Episode Reward: 153.998 | Epsilon: 0.763
Resetting the environment...




Step: 66793 | Episode Reward: 103.972 | Epsilon: 0.759
Resetting the environment...
Step: 67840 | Episode Reward: 161.439 | Epsilon: 0.755
Resetting the environment...
Step: 69390 | Episode Reward: 112.187 | Epsilon: 0.751
Resetting the environment...
Step: 70459 | Episode Reward: 161.083 | Epsilon: 0.748
Resetting the environment...
Step: 71525 | Episode Reward: 151.796 | Epsilon: 0.744
Resetting the environment...
Step: 72633 | Episode Reward: 155.591 | Epsilon: 0.740
Resetting the environment...
Step: 73738 | Episode Reward: 140.940 | Epsilon: 0.737
Resetting the environment...
Step: 74829 | Episode Reward: 143.928 | Epsilon: 0.733
Resetting the environment...




Step: 76395 | Episode Reward: 101.345 | Epsilon: 0.729
Resetting the environment...
Step: 77607 | Episode Reward: 142.561 | Epsilon: 0.726
Resetting the environment...
Step: 78815 | Episode Reward: 141.396 | Epsilon: 0.722
Resetting the environment...
Step: 79853 | Episode Reward: 163.146 | Epsilon: 0.718
Resetting the environment...
Step: 80942 | Episode Reward: 161.397 | Epsilon: 0.715
Resetting the environment...
Step: 82028 | Episode Reward: 146.968 | Epsilon: 0.711
Resetting the environment...
Step: 83128 | Episode Reward: 153.940 | Epsilon: 0.708
Resetting the environment...
Step: 84341 | Episode Reward: 142.633 | Epsilon: 0.704
Resetting the environment...
Step: 85518 | Episode Reward: 160.601 | Epsilon: 0.701
Resetting the environment...
Step: 86619 | Episode Reward: 154.950 | Epsilon: 0.697
Resetting the environment...
Step: 87728 | Episode Reward: 151.248 | Epsilon: 0.694
Resetting the environment...
Step: 88824 | Episode Reward: 156.445 | Epsilon: 0.690
Resetting the environ



Step: 123742 | Episode Reward: 105.305 | Epsilon: 0.591
Resetting the environment...
Step: 124838 | Episode Reward: 153.693 | Epsilon: 0.588
Resetting the environment...
Step: 125927 | Episode Reward: 156.106 | Epsilon: 0.585
Resetting the environment...
Step: 126972 | Episode Reward: 163.293 | Epsilon: 0.582
Resetting the environment...
Step: 128030 | Episode Reward: 158.597 | Epsilon: 0.579
Resetting the environment...
Step: 129064 | Episode Reward: 163.821 | Epsilon: 0.576
Resetting the environment...
Step: 130153 | Episode Reward: 161.117 | Epsilon: 0.573
Resetting the environment...
Step: 131246 | Episode Reward: 154.042 | Epsilon: 0.570
Resetting the environment...
Step: 132334 | Episode Reward: 157.167 | Epsilon: 0.568
Resetting the environment...
Step: 133400 | Episode Reward: 152.942 | Epsilon: 0.565
Resetting the environment...
Step: 134676 | Episode Reward: 144.907 | Epsilon: 0.562
Resetting the environment...
Step: 135752 | Episode Reward: 157.574 | Epsilon: 0.559
Resetting



Step: 136918 | Episode Reward: 143.511 | Epsilon: 0.556
Resetting the environment...
Step: 137993 | Episode Reward: 158.570 | Epsilon: 0.554
Resetting the environment...
Step: 139027 | Episode Reward: 164.542 | Epsilon: 0.551
Resetting the environment...
Step: 140077 | Episode Reward: 161.509 | Epsilon: 0.548
Resetting the environment...
Step: 141478 | Episode Reward: 119.271 | Epsilon: 0.545
Resetting the environment...
Step: 142565 | Episode Reward: 165.614 | Epsilon: 0.543
Resetting the environment...
Step: 143643 | Episode Reward: 150.953 | Epsilon: 0.540
Resetting the environment...
Step: 144710 | Episode Reward: 160.797 | Epsilon: 0.537
Resetting the environment...
Step: 145783 | Episode Reward: 154.920 | Epsilon: 0.534
Resetting the environment...
Step: 146812 | Episode Reward: 166.769 | Epsilon: 0.532
Resetting the environment...
Step: 147836 | Episode Reward: 161.131 | Epsilon: 0.529
Resetting the environment...
Step: 149294 | Episode Reward: 119.741 | Epsilon: 0.526
Resetting



Step: 162276 | Episode Reward: 112.850 | Epsilon: 0.496
Resetting the environment...
Step: 163324 | Episode Reward: 158.721 | Epsilon: 0.493
Resetting the environment...
Step: 164401 | Episode Reward: 162.109 | Epsilon: 0.491
Resetting the environment...
Step: 165435 | Episode Reward: 155.681 | Epsilon: 0.488
Resetting the environment...
Step: 166822 | Episode Reward: 119.148 | Epsilon: 0.486
Resetting the environment...
Step: 167884 | Episode Reward: 153.597 | Epsilon: 0.483
Resetting the environment...
Step: 168911 | Episode Reward: 158.481 | Epsilon: 0.481
Resetting the environment...
Step: 169952 | Episode Reward: 162.755 | Epsilon: 0.479
Resetting the environment...
Step: 171002 | Episode Reward: 157.843 | Epsilon: 0.476
Resetting the environment...
Step: 172050 | Episode Reward: 162.917 | Epsilon: 0.474
Resetting the environment...
Step: 173112 | Episode Reward: 159.995 | Epsilon: 0.471
Resetting the environment...
Step: 174359 | Episode Reward: 144.803 | Epsilon: 0.469
Resetting



Step: 195802 | Episode Reward: 154.002 | Epsilon: 0.424
Resetting the environment...
Step: 196843 | Episode Reward: 156.423 | Epsilon: 0.422
Resetting the environment...
Step: 197891 | Episode Reward: 152.905 | Epsilon: 0.420
Resetting the environment...
Step: 198920 | Episode Reward: 160.201 | Epsilon: 0.418
Resetting the environment...
Training finished
Saving model to distributional-dqn-normalisation-sma.zip
Model saved
