In [1]:
import os
import sys
import gym4real
import gymnasium as gym
from gymnasium import spaces
from gym4real.envs.wds.utils import parameter_generator
from gym4real.envs.wds.reward_scaling_wrapper import RewardScalingWrapper
import wntr
import wntr.sim
from DQN import DQN_Implementation, Double_DQN_Implementation
from gym4real.envs.wds.Normalise import NormaliseObservation

  from pkg_resources import resource_filename


In [2]:
config_path = os.path.join(os.getcwd(), "gym4real", "envs", "wds", "world_anytown.yaml")

base_params = parameter_generator(
    hydraulic_step=3600,
    duration=604800,
    seed=42,
    world_options=config_path)

In [3]:
# Environment using SMA
base_params['demand_moving_average'] = True  # Turn on SMA 
base_params['demand_exp_moving_average'] = False  # Turn off EMA 

In [4]:
def make_env():
    env = gym.make('gym4real/wds-v0', settings=base_params)
    env = RewardScalingWrapper(env)
    env = NormaliseObservation(env)
    
    return env

In [5]:
ddqn_sma_env = make_env()

ddqn_sma_model = Double_DQN_Implementation(
    env=ddqn_sma_env, 
    learning_rate=3e-4, 
    epsilon_decay_flag = False,
    tensorboard_log="./wds_custom_logs/")

ddqn_sma_model.learn(total_timesteps=200000)

ddqn_sma_model.save("ddqn-sma-no-epsilon-decay")

  gym.logger.warn(
  gym.logger.warn(


Train for 200000 steps
Resetting the environment...
Step: 191 | Episode Reward: 138.400 | Epsilon: 1.000
Resetting the environment...
Step: 405 | Episode Reward: 136.426 | Epsilon: 1.000
Resetting the environment...
Step: 595 | Episode Reward: 144.094 | Epsilon: 1.000
Resetting the environment...
Step: 797 | Episode Reward: 137.155 | Epsilon: 1.000
Resetting the environment...
Step: 1002 | Episode Reward: 139.300 | Epsilon: 1.000
Resetting the environment...
Step: 1205 | Episode Reward: 135.185 | Epsilon: 1.000
Resetting the environment...
Step: 1403 | Episode Reward: 142.329 | Epsilon: 1.000
Resetting the environment...
Step: 1595 | Episode Reward: 145.862 | Epsilon: 1.000
Resetting the environment...
Step: 1806 | Episode Reward: 136.360 | Epsilon: 1.000
Resetting the environment...
Step: 1999 | Episode Reward: 122.239 | Epsilon: 1.000
Resetting the environment...
Step: 2200 | Episode Reward: 138.053 | Epsilon: 1.000
Resetting the environment...
Step: 2393 | Episode Reward: 135.362 | 



Step: 28616 | Episode Reward: 120.822 | Epsilon: 1.000
Resetting the environment...
Step: 28815 | Episode Reward: 138.817 | Epsilon: 1.000
Resetting the environment...
Step: 29013 | Episode Reward: 140.071 | Epsilon: 1.000
Resetting the environment...
Step: 29214 | Episode Reward: 137.798 | Epsilon: 1.000
Resetting the environment...
Step: 29457 | Episode Reward: 141.129 | Epsilon: 1.000
Resetting the environment...
Step: 29661 | Episode Reward: 132.127 | Epsilon: 1.000
Resetting the environment...
Step: 29851 | Episode Reward: 139.865 | Epsilon: 1.000
Resetting the environment...
Step: 30033 | Episode Reward: 143.988 | Epsilon: 1.000
Resetting the environment...
Step: 30234 | Episode Reward: 137.920 | Epsilon: 1.000
Resetting the environment...
Step: 30431 | Episode Reward: 144.293 | Epsilon: 1.000
Resetting the environment...
Step: 30626 | Episode Reward: 139.467 | Epsilon: 1.000
Resetting the environment...
Step: 30868 | Episode Reward: 140.698 | Epsilon: 1.000
Resetting the environ



Step: 37146 | Episode Reward: 141.331 | Epsilon: 1.000
Resetting the environment...
Step: 37334 | Episode Reward: 115.489 | Epsilon: 1.000
Resetting the environment...
Step: 37545 | Episode Reward: 146.964 | Epsilon: 1.000
Resetting the environment...
Step: 37744 | Episode Reward: 138.517 | Epsilon: 1.000
Resetting the environment...
Step: 37948 | Episode Reward: 142.136 | Epsilon: 1.000
Resetting the environment...
Step: 38144 | Episode Reward: 141.028 | Epsilon: 1.000
Resetting the environment...
Step: 38332 | Episode Reward: 137.914 | Epsilon: 1.000
Resetting the environment...
Step: 38534 | Episode Reward: 140.400 | Epsilon: 1.000
Resetting the environment...
Step: 38735 | Episode Reward: 138.183 | Epsilon: 1.000
Resetting the environment...
Step: 38935 | Episode Reward: 136.123 | Epsilon: 1.000
Resetting the environment...
Step: 39127 | Episode Reward: 141.920 | Epsilon: 1.000
Resetting the environment...
Step: 39318 | Episode Reward: 122.524 | Epsilon: 1.000
Resetting the environ



Step: 40960 | Episode Reward: 131.325 | Epsilon: 1.000
Resetting the environment...
Step: 41159 | Episode Reward: 143.337 | Epsilon: 1.000
Resetting the environment...
Step: 41358 | Episode Reward: 144.005 | Epsilon: 1.000
Resetting the environment...
Step: 41543 | Episode Reward: 124.730 | Epsilon: 1.000
Resetting the environment...
Step: 41738 | Episode Reward: 136.828 | Epsilon: 1.000
Resetting the environment...
Step: 41948 | Episode Reward: 136.585 | Epsilon: 1.000
Resetting the environment...
Step: 42194 | Episode Reward: 139.402 | Epsilon: 1.000
Resetting the environment...
Step: 42386 | Episode Reward: 144.362 | Epsilon: 1.000
Resetting the environment...
Step: 42584 | Episode Reward: 147.530 | Epsilon: 1.000
Resetting the environment...
Step: 42792 | Episode Reward: 141.378 | Epsilon: 1.000
Resetting the environment...
Step: 42991 | Episode Reward: 142.475 | Epsilon: 1.000
Resetting the environment...
Step: 43187 | Episode Reward: 142.097 | Epsilon: 1.000
Resetting the environ



Step: 58862 | Episode Reward: 133.801 | Epsilon: 1.000
Resetting the environment...
Step: 59059 | Episode Reward: 141.448 | Epsilon: 1.000
Resetting the environment...
Step: 59256 | Episode Reward: 135.557 | Epsilon: 1.000
Resetting the environment...
Step: 59478 | Episode Reward: 140.002 | Epsilon: 1.000
Resetting the environment...
Step: 59707 | Episode Reward: 142.744 | Epsilon: 1.000
Resetting the environment...
Step: 59918 | Episode Reward: 146.343 | Epsilon: 1.000
Resetting the environment...
Step: 60115 | Episode Reward: 143.830 | Epsilon: 1.000
Resetting the environment...
Step: 60355 | Episode Reward: 142.482 | Epsilon: 1.000
Resetting the environment...
Step: 60542 | Episode Reward: 145.373 | Epsilon: 1.000
Resetting the environment...
Step: 60743 | Episode Reward: 139.283 | Epsilon: 1.000
Resetting the environment...
Step: 60983 | Episode Reward: 144.049 | Epsilon: 1.000
Resetting the environment...
Step: 61171 | Episode Reward: 143.228 | Epsilon: 1.000
Resetting the environ



Step: 73866 | Episode Reward: 123.177 | Epsilon: 1.000
Resetting the environment...
Step: 74061 | Episode Reward: 135.615 | Epsilon: 1.000
Resetting the environment...
Step: 74257 | Episode Reward: 145.723 | Epsilon: 1.000
Resetting the environment...
Step: 74450 | Episode Reward: 147.037 | Epsilon: 1.000
Resetting the environment...
Step: 74654 | Episode Reward: 135.013 | Epsilon: 1.000
Resetting the environment...
Step: 74861 | Episode Reward: 135.921 | Epsilon: 1.000
Resetting the environment...
Step: 75051 | Episode Reward: 135.427 | Epsilon: 1.000
Resetting the environment...
Step: 75237 | Episode Reward: 148.033 | Epsilon: 1.000
Resetting the environment...
Step: 75439 | Episode Reward: 136.765 | Epsilon: 1.000
Resetting the environment...
Step: 75640 | Episode Reward: 139.922 | Epsilon: 1.000
Resetting the environment...
Step: 75891 | Episode Reward: 139.501 | Epsilon: 1.000
Resetting the environment...
Step: 76086 | Episode Reward: 140.548 | Epsilon: 1.000
Resetting the environ



Step: 115376 | Episode Reward: 133.203 | Epsilon: 1.000
Resetting the environment...
Step: 115576 | Episode Reward: 136.836 | Epsilon: 1.000
Resetting the environment...
Step: 115780 | Episode Reward: 142.934 | Epsilon: 1.000
Resetting the environment...
Step: 115971 | Episode Reward: 140.871 | Epsilon: 1.000
Resetting the environment...
Step: 116171 | Episode Reward: 142.280 | Epsilon: 1.000
Resetting the environment...
Step: 116369 | Episode Reward: 135.465 | Epsilon: 1.000
Resetting the environment...
Step: 116584 | Episode Reward: 140.940 | Epsilon: 1.000
Resetting the environment...
Step: 116798 | Episode Reward: 133.925 | Epsilon: 1.000
Resetting the environment...
Step: 116998 | Episode Reward: 146.835 | Epsilon: 1.000
Resetting the environment...
Step: 117180 | Episode Reward: 141.587 | Epsilon: 1.000
Resetting the environment...
Step: 117375 | Episode Reward: 145.281 | Epsilon: 1.000
Resetting the environment...
Step: 117573 | Episode Reward: 141.520 | Epsilon: 1.000
Resetting



Step: 181062 | Episode Reward: 123.651 | Epsilon: 1.000
Resetting the environment...
Step: 181267 | Episode Reward: 127.843 | Epsilon: 1.000
Resetting the environment...
Step: 181459 | Episode Reward: 142.902 | Epsilon: 1.000
Resetting the environment...
Step: 181649 | Episode Reward: 138.576 | Epsilon: 1.000
Resetting the environment...
Step: 181840 | Episode Reward: 132.889 | Epsilon: 1.000
Resetting the environment...
Step: 182047 | Episode Reward: 143.720 | Epsilon: 1.000
Resetting the environment...
Step: 182283 | Episode Reward: 142.907 | Epsilon: 1.000
Resetting the environment...
Step: 182517 | Episode Reward: 143.787 | Epsilon: 1.000
Resetting the environment...
Step: 182711 | Episode Reward: 137.641 | Epsilon: 1.000
Resetting the environment...
Step: 182925 | Episode Reward: 134.584 | Epsilon: 1.000
Resetting the environment...
Step: 183135 | Episode Reward: 134.769 | Epsilon: 1.000
Resetting the environment...
Step: 183325 | Episode Reward: 136.470 | Epsilon: 1.000
Resetting

In [6]:
# Environment using EMA
base_params['demand_moving_average'] = False  # Turn off SMA 
base_params['demand_exp_moving_average'] = True  # Turn on EMA 

In [7]:
ddqn_ema_env = make_env()

ddqn_ema_model = Double_DQN_Implementation(
    env=ddqn_ema_env, 
    learning_rate=3e-4, 
    epsilon_decay_flag = False,
    tensorboard_log="./wds_custom_logs/")

ddqn_ema_model.learn(total_timesteps=200000)

ddqn_ema_model.save("ddqn-ema-no-epsilon-decay")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 201 | Episode Reward: 140.968 | Epsilon: 1.000
Resetting the environment...
Step: 415 | Episode Reward: 138.388 | Epsilon: 1.000
Resetting the environment...
Step: 619 | Episode Reward: 142.308 | Epsilon: 1.000
Resetting the environment...
Step: 815 | Episode Reward: 140.530 | Epsilon: 1.000
Resetting the environment...
Step: 1020 | Episode Reward: 130.931 | Epsilon: 1.000
Resetting the environment...
Step: 1222 | Episode Reward: 136.340 | Epsilon: 1.000
Resetting the environment...
Step: 1432 | Episode Reward: 139.263 | Epsilon: 1.000
Resetting the environment...
Step: 1639 | Episode Reward: 141.470 | Epsilon: 1.000
Resetting the environment...
Step: 1844 | Episode Reward: 136.801 | Epsilon: 1.000
Resetting the environment...
Step: 2039 | Episode Reward: 128.545 | Epsilon: 1.000
Resetting the environment...
Step: 2231 | Episode Reward: 139.150 | Epsilon: 1.000
Resetting the environment...
Step: 2436 | Episode Reward: 132.846 | Epsilon: 1.000
Resetting the environment...
Step: 26



Step: 13149 | Episode Reward: 129.707 | Epsilon: 1.000
Resetting the environment...
Step: 13380 | Episode Reward: 131.377 | Epsilon: 1.000
Resetting the environment...
Step: 13608 | Episode Reward: 134.064 | Epsilon: 1.000
Resetting the environment...
Step: 13792 | Episode Reward: 146.095 | Epsilon: 1.000
Resetting the environment...
Step: 13991 | Episode Reward: 142.833 | Epsilon: 1.000
Resetting the environment...
Step: 14197 | Episode Reward: 130.479 | Epsilon: 1.000
Resetting the environment...
Step: 14399 | Episode Reward: 138.125 | Epsilon: 1.000
Resetting the environment...
Step: 14613 | Episode Reward: 137.590 | Epsilon: 1.000
Resetting the environment...
Step: 14822 | Episode Reward: 143.109 | Epsilon: 1.000
Resetting the environment...
Step: 15018 | Episode Reward: 139.878 | Epsilon: 1.000
Resetting the environment...
Step: 15214 | Episode Reward: 139.274 | Epsilon: 1.000
Resetting the environment...
Step: 15417 | Episode Reward: 140.897 | Epsilon: 1.000
Resetting the environ



Step: 21736 | Episode Reward: 133.384 | Epsilon: 1.000
Resetting the environment...
Step: 21945 | Episode Reward: 138.688 | Epsilon: 1.000
Resetting the environment...
Step: 22153 | Episode Reward: 136.877 | Epsilon: 1.000
Resetting the environment...
Step: 22342 | Episode Reward: 144.885 | Epsilon: 1.000
Resetting the environment...
Step: 22540 | Episode Reward: 140.838 | Epsilon: 1.000
Resetting the environment...
Step: 22745 | Episode Reward: 141.895 | Epsilon: 1.000
Resetting the environment...
Step: 22953 | Episode Reward: 142.828 | Epsilon: 1.000
Resetting the environment...
Step: 23159 | Episode Reward: 140.139 | Epsilon: 1.000
Resetting the environment...
Step: 23364 | Episode Reward: 138.462 | Epsilon: 1.000
Resetting the environment...
Step: 23568 | Episode Reward: 132.635 | Epsilon: 1.000
Resetting the environment...
Step: 23805 | Episode Reward: 141.880 | Epsilon: 1.000
Resetting the environment...
Step: 24004 | Episode Reward: 137.927 | Epsilon: 1.000
Resetting the environ



Step: 59202 | Episode Reward: 137.571 | Epsilon: 1.000
Resetting the environment...
Step: 59405 | Episode Reward: 140.480 | Epsilon: 1.000
Resetting the environment...
Step: 59607 | Episode Reward: 134.172 | Epsilon: 1.000
Resetting the environment...
Step: 59829 | Episode Reward: 138.324 | Epsilon: 1.000
Resetting the environment...
Step: 60081 | Episode Reward: 138.620 | Epsilon: 1.000
Resetting the environment...
Step: 60294 | Episode Reward: 144.108 | Epsilon: 1.000
Resetting the environment...
Step: 60487 | Episode Reward: 140.921 | Epsilon: 1.000
Resetting the environment...
Step: 60748 | Episode Reward: 137.595 | Epsilon: 1.000
Resetting the environment...
Step: 60941 | Episode Reward: 143.406 | Epsilon: 1.000
Resetting the environment...
Step: 61135 | Episode Reward: 140.910 | Epsilon: 1.000
Resetting the environment...
Step: 61386 | Episode Reward: 140.432 | Epsilon: 1.000
Resetting the environment...
Step: 61582 | Episode Reward: 140.195 | Epsilon: 1.000
Resetting the environ



Step: 63642 | Episode Reward: 126.417 | Epsilon: 1.000
Resetting the environment...
Step: 63862 | Episode Reward: 143.774 | Epsilon: 1.000
Resetting the environment...
Step: 64050 | Episode Reward: 142.111 | Epsilon: 1.000
Resetting the environment...
Step: 64267 | Episode Reward: 135.848 | Epsilon: 1.000
Resetting the environment...
Step: 64473 | Episode Reward: 137.467 | Epsilon: 1.000
Resetting the environment...
Step: 64668 | Episode Reward: 141.640 | Epsilon: 1.000
Resetting the environment...
Step: 64870 | Episode Reward: 134.792 | Epsilon: 1.000
Resetting the environment...
Step: 65063 | Episode Reward: 140.721 | Epsilon: 1.000
Resetting the environment...
Step: 65257 | Episode Reward: 134.826 | Epsilon: 1.000
Resetting the environment...
Step: 65469 | Episode Reward: 140.429 | Epsilon: 1.000
Resetting the environment...
Step: 65675 | Episode Reward: 141.404 | Epsilon: 1.000
Resetting the environment...
Step: 65914 | Episode Reward: 125.828 | Epsilon: 1.000
Resetting the environ



Step: 68185 | Episode Reward: 127.535 | Epsilon: 1.000
Resetting the environment...
Step: 68387 | Episode Reward: 143.239 | Epsilon: 1.000
Resetting the environment...
Step: 68590 | Episode Reward: 143.119 | Epsilon: 1.000
Resetting the environment...
Step: 68791 | Episode Reward: 142.126 | Epsilon: 1.000
Resetting the environment...
Step: 68988 | Episode Reward: 135.324 | Epsilon: 1.000
Resetting the environment...
Step: 69235 | Episode Reward: 146.936 | Epsilon: 1.000
Resetting the environment...
Step: 69432 | Episode Reward: 142.640 | Epsilon: 1.000
Resetting the environment...
Step: 69641 | Episode Reward: 143.518 | Epsilon: 1.000
Resetting the environment...
Step: 69890 | Episode Reward: 130.738 | Epsilon: 1.000
Resetting the environment...
Step: 70091 | Episode Reward: 135.014 | Epsilon: 1.000
Resetting the environment...
Step: 70293 | Episode Reward: 138.110 | Epsilon: 1.000
Resetting the environment...
Step: 70497 | Episode Reward: 139.748 | Epsilon: 1.000
Resetting the environ



Step: 72350 | Episode Reward: 128.239 | Epsilon: 1.000
Resetting the environment...
Step: 72552 | Episode Reward: 137.930 | Epsilon: 1.000
Resetting the environment...
Step: 72739 | Episode Reward: 141.305 | Epsilon: 1.000
Resetting the environment...
Step: 72930 | Episode Reward: 147.410 | Epsilon: 1.000
Resetting the environment...
Step: 73130 | Episode Reward: 146.927 | Epsilon: 1.000
Resetting the environment...
Step: 73312 | Episode Reward: 144.885 | Epsilon: 1.000
Resetting the environment...




Step: 73570 | Episode Reward: 135.606 | Epsilon: 1.000
Resetting the environment...
Step: 73765 | Episode Reward: 147.254 | Epsilon: 1.000
Resetting the environment...
Step: 73958 | Episode Reward: 142.827 | Epsilon: 1.000
Resetting the environment...
Step: 74150 | Episode Reward: 143.154 | Epsilon: 1.000
Resetting the environment...
Step: 74407 | Episode Reward: 124.998 | Epsilon: 1.000
Resetting the environment...
Step: 74613 | Episode Reward: 137.822 | Epsilon: 1.000
Resetting the environment...
Step: 74804 | Episode Reward: 144.222 | Epsilon: 1.000
Resetting the environment...
Step: 74995 | Episode Reward: 145.445 | Epsilon: 1.000
Resetting the environment...
Step: 75197 | Episode Reward: 134.444 | Epsilon: 1.000
Resetting the environment...
Step: 75405 | Episode Reward: 136.902 | Epsilon: 1.000
Resetting the environment...
Step: 75607 | Episode Reward: 132.881 | Epsilon: 1.000
Resetting the environment...
Step: 75789 | Episode Reward: 146.902 | Epsilon: 1.000
Resetting the environ



Step: 109192 | Episode Reward: 133.900 | Epsilon: 1.000
Resetting the environment...
Step: 109391 | Episode Reward: 142.768 | Epsilon: 1.000
Resetting the environment...
Step: 109596 | Episode Reward: 145.480 | Epsilon: 1.000
Resetting the environment...
Step: 109787 | Episode Reward: 142.448 | Epsilon: 1.000
Resetting the environment...
Step: 110039 | Episode Reward: 126.968 | Epsilon: 1.000
Resetting the environment...




Step: 110292 | Episode Reward: 127.189 | Epsilon: 1.000
Resetting the environment...
Step: 110477 | Episode Reward: 138.993 | Epsilon: 1.000
Resetting the environment...
Step: 110680 | Episode Reward: 132.417 | Epsilon: 1.000
Resetting the environment...
Step: 110877 | Episode Reward: 141.513 | Epsilon: 1.000
Resetting the environment...
Step: 111067 | Episode Reward: 145.493 | Epsilon: 1.000
Resetting the environment...
Step: 111262 | Episode Reward: 139.717 | Epsilon: 1.000
Resetting the environment...
Step: 111496 | Episode Reward: 135.581 | Epsilon: 1.000
Resetting the environment...
Step: 111707 | Episode Reward: 138.388 | Epsilon: 1.000
Resetting the environment...
Step: 111925 | Episode Reward: 134.811 | Epsilon: 1.000
Resetting the environment...
Step: 112127 | Episode Reward: 135.570 | Epsilon: 1.000
Resetting the environment...
Step: 112328 | Episode Reward: 140.464 | Epsilon: 1.000
Resetting the environment...
Step: 112514 | Episode Reward: 135.873 | Epsilon: 1.000
Resetting



Step: 138763 | Episode Reward: 133.260 | Epsilon: 1.000
Resetting the environment...
Step: 138965 | Episode Reward: 143.864 | Epsilon: 1.000
Resetting the environment...
Step: 139160 | Episode Reward: 137.477 | Epsilon: 1.000
Resetting the environment...
Step: 139350 | Episode Reward: 144.603 | Epsilon: 1.000
Resetting the environment...
Step: 139547 | Episode Reward: 143.638 | Epsilon: 1.000
Resetting the environment...
Step: 139736 | Episode Reward: 142.970 | Epsilon: 1.000
Resetting the environment...
Step: 139930 | Episode Reward: 142.919 | Epsilon: 1.000
Resetting the environment...
Step: 140115 | Episode Reward: 142.329 | Epsilon: 1.000
Resetting the environment...
Step: 140300 | Episode Reward: 119.534 | Epsilon: 1.000
Resetting the environment...
Step: 140490 | Episode Reward: 146.493 | Epsilon: 1.000
Resetting the environment...
Step: 140690 | Episode Reward: 140.519 | Epsilon: 1.000
Resetting the environment...
Step: 140890 | Episode Reward: 145.439 | Epsilon: 1.000
Resetting



Step: 141299 | Episode Reward: 128.665 | Epsilon: 1.000
Resetting the environment...
Step: 141534 | Episode Reward: 134.260 | Epsilon: 1.000
Resetting the environment...
Step: 141758 | Episode Reward: 132.790 | Epsilon: 1.000
Resetting the environment...
Step: 142018 | Episode Reward: 131.472 | Epsilon: 1.000
Resetting the environment...
Step: 142217 | Episode Reward: 132.917 | Epsilon: 1.000
Resetting the environment...
Step: 142422 | Episode Reward: 137.027 | Epsilon: 1.000
Resetting the environment...
Step: 142670 | Episode Reward: 139.719 | Epsilon: 1.000
Resetting the environment...
Step: 142862 | Episode Reward: 139.745 | Epsilon: 1.000
Resetting the environment...
Step: 143050 | Episode Reward: 143.400 | Epsilon: 1.000
Resetting the environment...
Step: 143257 | Episode Reward: 141.543 | Epsilon: 1.000
Resetting the environment...
Step: 143464 | Episode Reward: 139.289 | Epsilon: 1.000
Resetting the environment...
Step: 143655 | Episode Reward: 144.536 | Epsilon: 1.000
Resetting



Step: 180462 | Episode Reward: 137.140 | Epsilon: 1.000
Resetting the environment...
Step: 180660 | Episode Reward: 141.347 | Epsilon: 1.000
Resetting the environment...
Step: 180850 | Episode Reward: 141.754 | Epsilon: 1.000
Resetting the environment...
Step: 181042 | Episode Reward: 136.672 | Epsilon: 1.000
Resetting the environment...
Step: 181225 | Episode Reward: 141.284 | Epsilon: 1.000
Resetting the environment...
Step: 181462 | Episode Reward: 120.003 | Epsilon: 1.000
Resetting the environment...
Step: 181669 | Episode Reward: 127.386 | Epsilon: 1.000
Resetting the environment...
Step: 181856 | Episode Reward: 143.809 | Epsilon: 1.000
Resetting the environment...
Step: 182056 | Episode Reward: 139.577 | Epsilon: 1.000
Resetting the environment...
Step: 182259 | Episode Reward: 130.185 | Epsilon: 1.000
Resetting the environment...
Step: 182470 | Episode Reward: 144.784 | Epsilon: 1.000
Resetting the environment...
Step: 182709 | Episode Reward: 140.310 | Epsilon: 1.000
Resetting



Step: 184181 | Episode Reward: 135.208 | Epsilon: 1.000
Resetting the environment...
Step: 184389 | Episode Reward: 140.040 | Epsilon: 1.000
Resetting the environment...
Step: 184589 | Episode Reward: 140.343 | Epsilon: 1.000
Resetting the environment...
Step: 184779 | Episode Reward: 135.000 | Epsilon: 1.000
Resetting the environment...
Step: 184971 | Episode Reward: 139.094 | Epsilon: 1.000
Resetting the environment...
Step: 185166 | Episode Reward: 147.443 | Epsilon: 1.000
Resetting the environment...
Step: 185360 | Episode Reward: 136.935 | Epsilon: 1.000
Resetting the environment...
Step: 185551 | Episode Reward: 139.646 | Epsilon: 1.000
Resetting the environment...
Step: 185753 | Episode Reward: 141.278 | Epsilon: 1.000
Resetting the environment...
Step: 185947 | Episode Reward: 137.270 | Epsilon: 1.000
Resetting the environment...
Step: 186145 | Episode Reward: 143.264 | Epsilon: 1.000
Resetting the environment...
Step: 186339 | Episode Reward: 146.224 | Epsilon: 1.000
Resetting