In [1]:
import os
import sys
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Jupyter Magic to make plots show up inline
%matplotlib inline

import gymnasium as gym
import gym4real
from gymnasium import spaces
from gym4real.envs.wds.utils import parameter_generator
from stable_baselines3 import DQN, A2C
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor, VecNormalize
from gymnasium.wrappers import TimeLimit

import wntr
import wntr.sim

  from pkg_resources import resource_filename


In [2]:
config_path = os.path.join(os.getcwd(), "gym4real", "envs", "wds", "world_anytown.yaml")

base_params = parameter_generator(
    hydraulic_step=3600,
    duration=3600 * 24 * 7,
    seed=42,
    world_options=config_path)

base_params['demand_moving_average'] = False  # Turn off SMA 
base_params['demand_exp_moving_average'] = True  # Turn on EMA 

In [3]:
def make_env_tl_wrapper():
    env = gym.make('gym4real/wds-v0', settings=base_params)
    env = TimeLimit(env, max_episode_steps=250)  # Give it 250 steps to finish 168 hours
    
    return env

In [11]:
# CREATE TRAINING ENVIRONMENT
train_env = DummyVecEnv([make_env_tl_wrapper])
train_env = VecMonitor(train_env)
MODEL_NAME = "DQN_WDSEnv_TL_Wrapper"

print("ðŸ§  Initializing DQN Agent...")
model = DQN(
    "MlpPolicy", 
    train_env,
    device='cpu',
    verbose=1,
    tensorboard_log="./wds_logs/")

print(f"Training {MODEL_NAME}...")
model.learn(total_timesteps=150000, tb_log_name="dqn_wds_run_tl_wrapper")
print("âœ… Training Complete.")

# Save the model
model.save(MODEL_NAME)

ðŸ§  Initializing DQN Agent...
Using cpu device
Training DQN_WDSEnv_TL_Wrapper...
Resetting the environment...
Logging to ./wds_logs/dqn_wds_run_tl_wrapper_1


  gym.logger.warn(
  gym.logger.warn(


Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 224      |
|    ep_rew_mean      | 172      |
|    exploration_rate | 0.943    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 34       |
|    time_elapsed     | 26       |
|    total_timesteps  | 896      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0285   |
|    n_updates        | 198      |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 220      |
|    ep_rew_mean      | 163      |
|    exploration_rate | 0.889    |
| time/               |          |
|    episodes         | 8        



Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 222      |
|    ep_rew_mean      | 157      |
|    exploration_rate | 0.55     |
| time/               |          |
|    episodes         | 32       |
|    fps              | 34       |
|    time_elapsed     | 207      |
|    total_timesteps  | 7110     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   |
|    n_updates        | 1752     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 223      |
|    ep_rew_mean      | 158      |
|    exploration_rate | 0.492    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 34       |
|    time_elapsed     | 233      |
|    total_time



Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 212      |
|    ep_rew_mean      | 144      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 34       |
|    time_elapsed     | 443      |
|    total_timesteps  | 15235    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0164   |
|    n_updates        | 3783     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...




Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 210      |
|    ep_rew_mean      | 143      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 76       |
|    fps              | 34       |
|    time_elapsed     | 465      |
|    total_timesteps  | 15989    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0263   |
|    n_updates        | 3972     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 209      |
|    ep_rew_mean      | 141      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 80       |
|    fps              | 34       |
|    time_elapsed     | 486      |
|    total_time



Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 203      |
|    ep_rew_mean      | 135      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 108      |
|    fps              | 34       |
|    time_elapsed     | 640      |
|    total_timesteps  | 22013    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0399   |
|    n_updates        | 5478     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 201      |
|    ep_rew_mean      | 134      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 112      



Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 194      |
|    ep_rew_mean      | 137      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 184      |
|    fps              | 34       |
|    time_elapsed     | 1072     |
|    total_timesteps  | 36834    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0228   |
|    n_updates        | 9183     |
----------------------------------
Resetting the environment...




Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 194      |
|    ep_rew_mean      | 139      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 188      |
|    fps              | 34       |
|    time_elapsed     | 1095     |
|    total_timesteps  | 37615    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0286   |
|    n_updates        | 9378     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 194      |
|    ep_rew_mean      | 139      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 192      |
|    fps              | 34 

In [5]:
# CREATE TRAINING ENVIRONMENT
train_env = DummyVecEnv([make_env_tl_wrapper])
train_env = VecMonitor(train_env)
MODEL_NAME = "A2C_WDSEnv_TL_Wrapper"

print("ðŸ§  Initializing A2C Agent...")
model = A2C(
    "MlpPolicy", 
    train_env, 
    learning_rate=1e-4,
    device="cpu", 
    verbose=1,
    tensorboard_log="./wds_logs/")

print(f"Training {MODEL_NAME}...")
model.learn(total_timesteps=150000, tb_log_name="a2c_wds_run_tl_wrapper") # Increase this for better results
print("âœ… Training Complete.")

# Save the model
model.save(MODEL_NAME)

ðŸ§  Initializing A2C Agent...
Using cpu device
Training A2C_WDSEnv_TL_Wrapper...
Resetting the environment...
Logging to ./wds_logs/a2c_wds_run_tl_wrapper_1


  gym.logger.warn(
  gym.logger.warn(


Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 226      |
|    ep_rew_mean        | 161      |
| time/                 |          |
|    fps                | 34       |
|    iterations         | 100      |
|    time_elapsed       | 14       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.0103   |
|    learning_rate      | 0.0001   |
|    n_updates          | 99       |
|    policy_loss        | 3.38     |
|    value_loss         | 6.93     |
------------------------------------
Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 222      |
|    ep_rew_mean        | 153      |
| time/                 |          |
|    fps                | 34       |
|    iterations         | 200    



Resetting the environment...
Resetting the environment...
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 226       |
|    ep_rew_mean        | 155       |
| time/                 |           |
|    fps                | 34        |
|    iterations         | 700       |
|    time_elapsed       | 102       |
|    total_timesteps    | 3500      |
| train/                |           |
|    entropy_loss       | -1.38     |
|    explained_variance | -0.000972 |
|    learning_rate      | 0.0001    |
|    n_updates          | 699       |
|    policy_loss        | 1.66      |
|    value_loss         | 1.34      |
-------------------------------------
Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 225      |
|    ep_rew_mean        | 154      |
| time/                 |          |
|    fps                | 34       |
|    iterations 



Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 221      |
|    ep_rew_mean        | 168      |
| time/                 |          |
|    fps                | 33       |
|    iterations         | 3900     |
|    time_elapsed       | 573      |
|    total_timesteps    | 19500    |
| train/                |          |
|    entropy_loss       | -1.27    |
|    explained_variance | 1.19e-07 |
|    learning_rate      | 0.0001   |
|    n_updates          | 3899     |
|    policy_loss        | 1.88     |
|    value_loss         | 3.59     |
------------------------------------
Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 220      |
|    ep_rew_mean        | 168      |
| time/                 |          |
|    fps                | 34       |
|    iterations         | 4000   

In [3]:
def make_env_normalised():
    """
    Creates the base environment and applies the TimeLimit wrapper.
    """
    # Create the base env
    env = gym.make('gym4real/wds-v0', settings=base_params)
    return env

In [4]:
train_env = DummyVecEnv([make_env_normalised]) 
train_env = VecMonitor(train_env)
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.)

MODEL_NAME = "DQN_WDSEnv_Normalise"

print("ðŸ§  Initializing DQN Agent...")
model = DQN(
    "MlpPolicy", 
    train_env,
    device='cpu',
    verbose=1,
    tensorboard_log="./wds_logs/")

print(f"Training {MODEL_NAME}...")
model.learn(total_timesteps=150000, tb_log_name="dqn_wds_run_normalise") 
print("âœ… Training Complete.")

# Save the model
model.save(MODEL_NAME)

  gym.logger.warn(
  gym.logger.warn(


ðŸ§  Initializing DQN Agent...
Using cpu device
Training DQN_WDSEnv_Normalise...
Resetting the environment...
Logging to ./wds_logs/dqn_wds_run_normalise_1
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 228      |
|    ep_rew_mean      | 159      |
|    exploration_rate | 0.942    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 34       |
|    time_elapsed     | 26       |
|    total_timesteps  | 912      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00373  |
|    n_updates        | 202      |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean 



Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 227      |
|    ep_rew_mean      | 156      |
|    exploration_rate | 0.77     |
| time/               |          |
|    episodes         | 16       |
|    fps              | 34       |
|    time_elapsed     | 106      |
|    total_timesteps  | 3633     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000436 |
|    n_updates        | 883      |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 225      |
|    ep_rew_mean      | 160      |
|    exploration_rate | 0.715    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 34       |
|    time_elapsed     | 132      |
|    total_time



Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 226      |
|    ep_rew_mean      | 164      |
|    exploration_rate | 0.542    |
| time/               |          |
|    episodes         | 32       |
|    fps              | 33       |
|    time_elapsed     | 212      |
|    total_timesteps  | 7232     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000181 |
|    n_updates        | 1782     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 226      |
|    ep_rew_mean      | 165      |
|    exploration_rate | 0.484    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 33       |
|    time_elapsed     | 240      |
|    total_time



Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 218      |
|    ep_rew_mean      | 173      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 84       |
|    fps              | 33       |
|    time_elapsed     | 541      |
|    total_timesteps  | 18334    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.12e-05 |
|    n_updates        | 4558     |
----------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
Resetting the environment...
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 218      |
|    ep_rew_mean      | 173      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 88       |
|    fps              | 33 

In [5]:
train_env = DummyVecEnv([make_env_normalised]) 
train_env = VecMonitor(train_env)
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.)

MODEL_NAME = "A2C_WDSEnv_Normalise"

# --- 4. TRAIN ---
print("ðŸ§  Initializing A2C Agent...")
model = A2C(
    "MlpPolicy", 
    train_env, 
    learning_rate=1e-4,
    device="cpu", 
    verbose=1,
    tensorboard_log="./wds_logs/"
)

print(f"Training {MODEL_NAME}...")
model.learn(total_timesteps=200000, tb_log_name="a2c_wds_run_normalise") 
print("âœ… Training Complete.")

model.save(MODEL_NAME)

ðŸ§  Initializing A2C Agent...
Using cpu device
Training A2C_WDSEnv_Normalise...
Resetting the environment...
Logging to ./wds_logs/a2c_wds_run_normalise_1


  gym.logger.warn(
  gym.logger.warn(


Resetting the environment...




Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 221      |
|    ep_rew_mean        | 154      |
| time/                 |          |
|    fps                | 34       |
|    iterations         | 100      |
|    time_elapsed       | 14       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | -2.94    |
|    learning_rate      | 0.0001   |
|    n_updates          | 99       |
|    policy_loss        | 0.293    |
|    value_loss         | 0.0575   |
------------------------------------
Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 219      |
|    ep_rew_mean        | 160      |
| time/                 |          |
|    fps                | 34       |
|    iterations         | 200      |
|    time_elapsed       |



Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 226      |
|    ep_rew_mean        | 155      |
| time/                 |          |
|    fps                | 34       |
|    iterations         | 800      |
|    time_elapsed       | 116      |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | -7.04    |
|    learning_rate      | 0.0001   |
|    n_updates          | 799      |
|    policy_loss        | 0.512    |
|    value_loss         | 0.163    |
------------------------------------
Resetting the environment...
Resetting the environment...
Resetting the environment...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 224      |
|    ep_rew_mean        | 154      |
| time/                 |          |
|    fps                | 34       |
|   