In [1]:
import os
import sys
import gym4real
import gymnasium as gym
from gymnasium import spaces
from gym4real.envs.wds.utils import parameter_generator
from gym4real.envs.wds.reward_scaling_wrapper import RewardScalingWrapper
import wntr
import wntr.sim
from DQN import DQN_Implementation, Double_DQN_Implementation

  from pkg_resources import resource_filename


In [2]:
config_path = os.path.join(os.getcwd(), "gym4real", "envs", "wds", "world_anytown.yaml")

base_params = parameter_generator(
    hydraulic_step=3600,
    duration=604800,
    seed=42,
    world_options=config_path)

In [3]:
# Environment using SMA
base_params['demand_moving_average'] = True  # Turn on SMA 
base_params['demand_exp_moving_average'] = False  # Turn off EMA 

In [4]:
def make_env():
    env = gym.make('gym4real/wds-v0', settings=base_params)
    env = RewardScalingWrapper(env)
    
    return env

In [5]:
dqn_sma_env = make_env()

dqn_sma_model = DQN_Implementation(
    env=dqn_sma_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

dqn_sma_model.learn(total_timesteps=200000)

dqn_sma_model.save("dqn-sma-no-norm")

  gym.logger.warn(
  gym.logger.warn(


Train for 200000 steps
Resetting the environment...
Step: 201 | Episode Reward: 130.265 | Epsilon: 1.000
Resetting the environment...
Step: 431 | Episode Reward: 131.700 | Epsilon: 0.990
Resetting the environment...
Step: 651 | Episode Reward: 131.631 | Epsilon: 0.980
Resetting the environment...
Step: 870 | Episode Reward: 131.704 | Epsilon: 0.970
Resetting the environment...
Step: 1090 | Episode Reward: 127.539 | Epsilon: 0.961
Resetting the environment...
Step: 1312 | Episode Reward: 125.820 | Epsilon: 0.951
Resetting the environment...
Step: 1533 | Episode Reward: 131.712 | Epsilon: 0.941
Resetting the environment...
Step: 1744 | Episode Reward: 106.155 | Epsilon: 0.932
Resetting the environment...
Step: 1970 | Episode Reward: 125.961 | Epsilon: 0.923
Resetting the environment...
Step: 2165 | Episode Reward: 98.942 | Epsilon: 0.914
Resetting the environment...
Step: 2391 | Episode Reward: 134.560 | Epsilon: 0.904
Resetting the environment...




Step: 2606 | Episode Reward: 122.401 | Epsilon: 0.895
Resetting the environment...
Step: 2826 | Episode Reward: 130.964 | Epsilon: 0.886
Resetting the environment...




Step: 3037 | Episode Reward: 122.160 | Epsilon: 0.878
Resetting the environment...
Step: 3240 | Episode Reward: 117.969 | Epsilon: 0.869
Resetting the environment...
Step: 3471 | Episode Reward: 93.811 | Epsilon: 0.860
Resetting the environment...
Step: 3691 | Episode Reward: 120.413 | Epsilon: 0.851
Resetting the environment...
Step: 3901 | Episode Reward: 131.225 | Epsilon: 0.843
Resetting the environment...
Step: 4110 | Episode Reward: 127.669 | Epsilon: 0.835
Resetting the environment...
Step: 4335 | Episode Reward: 117.439 | Epsilon: 0.826
Resetting the environment...
Step: 4544 | Episode Reward: 117.161 | Epsilon: 0.818
Resetting the environment...
Step: 4756 | Episode Reward: 131.395 | Epsilon: 0.810
Resetting the environment...
Step: 4966 | Episode Reward: 110.348 | Epsilon: 0.802
Resetting the environment...
Step: 5184 | Episode Reward: 131.786 | Epsilon: 0.794
Resetting the environment...
Step: 5416 | Episode Reward: 121.699 | Epsilon: 0.786
Resetting the environment...
Step:



Step: 8337 | Episode Reward: 113.626 | Epsilon: 0.689
Resetting the environment...
Step: 8554 | Episode Reward: 126.647 | Epsilon: 0.683
Resetting the environment...
Step: 8763 | Episode Reward: 136.186 | Epsilon: 0.676
Resetting the environment...
Step: 8989 | Episode Reward: 96.651 | Epsilon: 0.669
Resetting the environment...
Step: 9197 | Episode Reward: 121.302 | Epsilon: 0.662
Resetting the environment...
Step: 9417 | Episode Reward: 82.637 | Epsilon: 0.656
Resetting the environment...
Step: 9630 | Episode Reward: 112.559 | Epsilon: 0.649
Resetting the environment...
Step: 9842 | Episode Reward: 121.981 | Epsilon: 0.643
Resetting the environment...
Step: 10045 | Episode Reward: 118.996 | Epsilon: 0.636
Resetting the environment...
Step: 10247 | Episode Reward: 117.458 | Epsilon: 0.630
Resetting the environment...
Step: 10457 | Episode Reward: 134.224 | Epsilon: 0.624
Resetting the environment...
Step: 10669 | Episode Reward: 124.018 | Epsilon: 0.617
Resetting the environment...
St



Step: 11525 | Episode Reward: 87.345 | Epsilon: 0.593
Resetting the environment...
Step: 11723 | Episode Reward: 142.977 | Epsilon: 0.587
Resetting the environment...
Step: 11934 | Episode Reward: 124.315 | Epsilon: 0.581
Resetting the environment...




Step: 12216 | Episode Reward: 116.054 | Epsilon: 0.575
Resetting the environment...
Step: 12417 | Episode Reward: 134.186 | Epsilon: 0.570
Resetting the environment...
Step: 12722 | Episode Reward: 126.038 | Epsilon: 0.564
Resetting the environment...
Step: 12957 | Episode Reward: 121.008 | Epsilon: 0.558
Resetting the environment...
Step: 13171 | Episode Reward: 129.584 | Epsilon: 0.553
Resetting the environment...
Step: 13384 | Episode Reward: 132.404 | Epsilon: 0.547
Resetting the environment...
Step: 13604 | Episode Reward: 123.056 | Epsilon: 0.542
Resetting the environment...
Step: 13806 | Episode Reward: 128.686 | Epsilon: 0.536
Resetting the environment...
Step: 14047 | Episode Reward: 131.633 | Epsilon: 0.531
Resetting the environment...
Step: 14259 | Episode Reward: 133.970 | Epsilon: 0.526
Resetting the environment...
Step: 14469 | Episode Reward: 136.383 | Epsilon: 0.520
Resetting the environment...
Step: 14664 | Episode Reward: 145.107 | Epsilon: 0.515
Resetting the environ



Step: 22625 | Episode Reward: 109.453 | Epsilon: 0.348
Resetting the environment...
Step: 22845 | Episode Reward: 124.907 | Epsilon: 0.345
Resetting the environment...
Step: 23050 | Episode Reward: 134.867 | Epsilon: 0.341
Resetting the environment...
Step: 23258 | Episode Reward: 146.758 | Epsilon: 0.338
Resetting the environment...
Step: 23465 | Episode Reward: 144.676 | Epsilon: 0.334
Resetting the environment...
Step: 23650 | Episode Reward: 150.552 | Epsilon: 0.331
Resetting the environment...
Step: 23859 | Episode Reward: 139.468 | Epsilon: 0.328
Resetting the environment...
Step: 24057 | Episode Reward: 146.245 | Epsilon: 0.324
Resetting the environment...
Step: 24257 | Episode Reward: 137.964 | Epsilon: 0.321
Resetting the environment...
Step: 24459 | Episode Reward: 133.983 | Epsilon: 0.318
Resetting the environment...
Step: 24685 | Episode Reward: 149.567 | Epsilon: 0.315
Resetting the environment...
Step: 24882 | Episode Reward: 138.126 | Epsilon: 0.312
Resetting the environ



Step: 40888 | Episode Reward: 69.608 | Epsilon: 0.133
Resetting the environment...
Step: 41096 | Episode Reward: 139.590 | Epsilon: 0.131
Resetting the environment...
Step: 41308 | Episode Reward: 147.770 | Epsilon: 0.130
Resetting the environment...
Step: 41491 | Episode Reward: 139.107 | Epsilon: 0.129
Resetting the environment...
Step: 41692 | Episode Reward: 139.020 | Epsilon: 0.127
Resetting the environment...
Step: 41897 | Episode Reward: 141.166 | Epsilon: 0.126
Resetting the environment...
Step: 42091 | Episode Reward: 151.932 | Epsilon: 0.125
Resetting the environment...
Step: 42318 | Episode Reward: 123.945 | Epsilon: 0.124
Resetting the environment...
Step: 42495 | Episode Reward: 149.813 | Epsilon: 0.122
Resetting the environment...
Step: 42693 | Episode Reward: 139.812 | Epsilon: 0.121
Resetting the environment...
Step: 42889 | Episode Reward: 140.213 | Epsilon: 0.120
Resetting the environment...
Step: 43087 | Episode Reward: 143.041 | Epsilon: 0.119
Resetting the environm



Step: 70091 | Episode Reward: 83.796 | Epsilon: 0.026
Resetting the environment...
Step: 70288 | Episode Reward: 121.986 | Epsilon: 0.026
Resetting the environment...
Step: 70475 | Episode Reward: 99.720 | Epsilon: 0.026
Resetting the environment...
Step: 70667 | Episode Reward: 136.550 | Epsilon: 0.026
Resetting the environment...
Step: 70856 | Episode Reward: 124.659 | Epsilon: 0.025
Resetting the environment...
Step: 71051 | Episode Reward: 111.351 | Epsilon: 0.025
Resetting the environment...
Step: 71240 | Episode Reward: 124.489 | Epsilon: 0.025
Resetting the environment...
Step: 71429 | Episode Reward: 137.624 | Epsilon: 0.025
Resetting the environment...
Step: 71622 | Episode Reward: 126.132 | Epsilon: 0.024
Resetting the environment...
Step: 71821 | Episode Reward: 135.615 | Epsilon: 0.024
Resetting the environment...
Step: 72006 | Episode Reward: 126.120 | Epsilon: 0.024
Resetting the environment...
Step: 72191 | Episode Reward: 145.628 | Epsilon: 0.024
Resetting the environme

In [6]:
ddqn_sma_env = make_env()

ddqn_sma_model = Double_DQN_Implementation(
    env=ddqn_sma_env, 
    learning_rate=1e-4, 
    tensorboard_log="./wds_custom_logs/")

ddqn_sma_model.learn(total_timesteps=200000)

ddqn_sma_model.save("ddqn-sma-no-norm")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 222 | Episode Reward: 131.817 | Epsilon: 1.000
Resetting the environment...
Step: 455 | Episode Reward: 130.868 | Epsilon: 0.990
Resetting the environment...
Step: 676 | Episode Reward: 123.484 | Epsilon: 0.980
Resetting the environment...
Step: 889 | Episode Reward: 128.256 | Epsilon: 0.970
Resetting the environment...
Step: 1120 | Episode Reward: 129.377 | Epsilon: 0.961
Resetting the environment...
Step: 1346 | Episode Reward: 126.577 | Epsilon: 0.951
Resetting the environment...
Step: 1568 | Episode Reward: 125.253 | Epsilon: 0.941
Resetting the environment...
Step: 1793 | Episode Reward: 128.328 | Epsilon: 0.932
Resetting the environment...
Step: 2019 | Episode Reward: 127.085 | Epsilon: 0.923
Resetting the environment...
Step: 2214 | Episode Reward: 110.556 | Epsilon: 0.914
Resetting the environment...
Step: 2429 | Episode Reward: 129.177 | Epsilon: 0.904
Resetting the environment...




Step: 2653 | Episode Reward: 114.669 | Epsilon: 0.895
Resetting the environment...
Step: 2877 | Episode Reward: 131.775 | Epsilon: 0.886
Resetting the environment...
Step: 3101 | Episode Reward: 124.251 | Epsilon: 0.878
Resetting the environment...
Step: 3301 | Episode Reward: 119.267 | Epsilon: 0.869
Resetting the environment...
Step: 3524 | Episode Reward: 96.375 | Epsilon: 0.860
Resetting the environment...
Step: 3743 | Episode Reward: 125.997 | Epsilon: 0.851
Resetting the environment...
Step: 3953 | Episode Reward: 131.562 | Epsilon: 0.843
Resetting the environment...
Step: 4171 | Episode Reward: 137.738 | Epsilon: 0.835
Resetting the environment...
Step: 4395 | Episode Reward: 120.959 | Epsilon: 0.826
Resetting the environment...
Step: 4607 | Episode Reward: 121.011 | Epsilon: 0.818
Resetting the environment...
Step: 4812 | Episode Reward: 135.725 | Epsilon: 0.810
Resetting the environment...
Step: 5054 | Episode Reward: 107.221 | Epsilon: 0.802
Resetting the environment...
Step:



Step: 11724 | Episode Reward: 102.709 | Epsilon: 0.593
Resetting the environment...
Step: 11928 | Episode Reward: 145.081 | Epsilon: 0.587
Resetting the environment...
Step: 12135 | Episode Reward: 124.155 | Epsilon: 0.581
Resetting the environment...




Step: 12351 | Episode Reward: 89.066 | Epsilon: 0.575
Resetting the environment...
Step: 12560 | Episode Reward: 129.835 | Epsilon: 0.570
Resetting the environment...
Step: 12787 | Episode Reward: 86.647 | Epsilon: 0.564
Resetting the environment...
Step: 13003 | Episode Reward: 107.082 | Epsilon: 0.558
Resetting the environment...
Step: 13200 | Episode Reward: 101.483 | Epsilon: 0.553
Resetting the environment...
Step: 13407 | Episode Reward: 125.947 | Epsilon: 0.547
Resetting the environment...
Step: 13624 | Episode Reward: 116.117 | Epsilon: 0.542
Resetting the environment...
Step: 13836 | Episode Reward: 116.537 | Epsilon: 0.536
Resetting the environment...




Step: 14046 | Episode Reward: 75.463 | Epsilon: 0.531
Resetting the environment...
Step: 14264 | Episode Reward: 91.296 | Epsilon: 0.526
Resetting the environment...
Step: 14481 | Episode Reward: 79.836 | Epsilon: 0.520
Resetting the environment...
Step: 14685 | Episode Reward: 123.255 | Epsilon: 0.515
Resetting the environment...
Step: 14888 | Episode Reward: 133.114 | Epsilon: 0.510
Resetting the environment...
Step: 15101 | Episode Reward: 124.815 | Epsilon: 0.505
Resetting the environment...
Step: 15312 | Episode Reward: 119.830 | Epsilon: 0.500
Resetting the environment...
Step: 15527 | Episode Reward: 87.353 | Epsilon: 0.495
Resetting the environment...
Step: 15735 | Episode Reward: 143.002 | Epsilon: 0.490
Resetting the environment...
Step: 15947 | Episode Reward: 107.586 | Epsilon: 0.485
Resetting the environment...
Step: 16147 | Episode Reward: 110.690 | Epsilon: 0.480
Resetting the environment...
Step: 16366 | Episode Reward: 127.867 | Epsilon: 0.475
Resetting the environment



Step: 22555 | Episode Reward: 93.155 | Epsilon: 0.352
Resetting the environment...
Step: 22803 | Episode Reward: 124.587 | Epsilon: 0.348
Resetting the environment...
Step: 23026 | Episode Reward: 118.040 | Epsilon: 0.345
Resetting the environment...
Step: 23235 | Episode Reward: 125.626 | Epsilon: 0.341
Resetting the environment...
Step: 23433 | Episode Reward: 131.529 | Epsilon: 0.338
Resetting the environment...
Step: 23657 | Episode Reward: 128.043 | Epsilon: 0.334
Resetting the environment...
Step: 23871 | Episode Reward: 108.724 | Epsilon: 0.331
Resetting the environment...
Step: 24085 | Episode Reward: 134.071 | Epsilon: 0.328
Resetting the environment...
Step: 24282 | Episode Reward: 87.700 | Epsilon: 0.324
Resetting the environment...
Step: 24486 | Episode Reward: 126.265 | Epsilon: 0.321
Resetting the environment...
Step: 24688 | Episode Reward: 111.785 | Epsilon: 0.318
Resetting the environment...
Step: 24883 | Episode Reward: 84.392 | Epsilon: 0.315
Resetting the environmen



Step: 41613 | Episode Reward: 143.263 | Epsilon: 0.133
Resetting the environment...
Step: 41800 | Episode Reward: 145.463 | Epsilon: 0.131
Resetting the environment...
Step: 41988 | Episode Reward: 150.191 | Epsilon: 0.130
Resetting the environment...
Step: 42176 | Episode Reward: 138.754 | Epsilon: 0.129
Resetting the environment...
Step: 42355 | Episode Reward: 140.692 | Epsilon: 0.127
Resetting the environment...
Step: 42536 | Episode Reward: 141.836 | Epsilon: 0.126
Resetting the environment...
Step: 42716 | Episode Reward: 153.695 | Epsilon: 0.125
Resetting the environment...
Step: 42888 | Episode Reward: 148.642 | Epsilon: 0.124
Resetting the environment...
Step: 43071 | Episode Reward: 151.281 | Epsilon: 0.122
Resetting the environment...
Step: 43263 | Episode Reward: 145.055 | Epsilon: 0.121
Resetting the environment...
Step: 43454 | Episode Reward: 145.045 | Epsilon: 0.120
Resetting the environment...
Step: 43638 | Episode Reward: 147.927 | Epsilon: 0.119
Resetting the environ

In [7]:
# Environment using EMA
base_params['demand_moving_average'] = False  # Turn off SMA 
base_params['demand_exp_moving_average'] = True  # Turn on EMA 

In [8]:
dqn_ema_env = make_env()

dqn_ema_model = DQN_Implementation(
    env=dqn_ema_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

dqn_ema_model.learn(total_timesteps=200000)

dqn_ema_model.save("dqn-ema-no-norm")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 205 | Episode Reward: 126.417 | Epsilon: 1.000
Resetting the environment...
Step: 436 | Episode Reward: 120.586 | Epsilon: 0.990
Resetting the environment...
Step: 658 | Episode Reward: 122.698 | Epsilon: 0.980
Resetting the environment...
Step: 876 | Episode Reward: 130.277 | Epsilon: 0.970
Resetting the environment...
Step: 1098 | Episode Reward: 127.670 | Epsilon: 0.961
Resetting the environment...
Step: 1319 | Episode Reward: 125.816 | Epsilon: 0.951
Resetting the environment...
Step: 1534 | Episode Reward: 128.287 | Epsilon: 0.941
Resetting the environment...
Step: 1767 | Episode Reward: 109.964 | Epsilon: 0.932
Resetting the environment...
Step: 1987 | Episode Reward: 131.726 | Epsilon: 0.923
Resetting the environment...
Step: 2184 | Episode Reward: 116.994 | Epsilon: 0.914
Resetting the environment...
Step: 2393 | Episode Reward: 132.780 | Epsilon: 0.904
Resetting the environment...
Step: 2598 | Episode Reward: 124.371 | Epsilon: 0.895
Resetting the environment...
Step: 28



Step: 11552 | Episode Reward: 108.612 | Epsilon: 0.593
Resetting the environment...
Step: 11759 | Episode Reward: 147.826 | Epsilon: 0.587
Resetting the environment...
Step: 11977 | Episode Reward: 120.846 | Epsilon: 0.581
Resetting the environment...




Step: 12204 | Episode Reward: 91.905 | Epsilon: 0.575
Resetting the environment...
Step: 12408 | Episode Reward: 131.581 | Epsilon: 0.570
Resetting the environment...
Step: 12633 | Episode Reward: 89.734 | Epsilon: 0.564
Resetting the environment...
Step: 12843 | Episode Reward: 104.873 | Epsilon: 0.558
Resetting the environment...
Step: 13050 | Episode Reward: 117.919 | Epsilon: 0.553
Resetting the environment...
Step: 13264 | Episode Reward: 132.538 | Epsilon: 0.547
Resetting the environment...
Step: 13481 | Episode Reward: 108.973 | Epsilon: 0.542
Resetting the environment...
Step: 13692 | Episode Reward: 97.632 | Epsilon: 0.536
Resetting the environment...




Step: 13933 | Episode Reward: 94.220 | Epsilon: 0.531
Resetting the environment...
Step: 14181 | Episode Reward: 92.174 | Epsilon: 0.526
Resetting the environment...
Step: 14452 | Episode Reward: 119.612 | Epsilon: 0.520
Resetting the environment...
Step: 14664 | Episode Reward: 140.901 | Epsilon: 0.515
Resetting the environment...
Step: 14875 | Episode Reward: 132.434 | Epsilon: 0.510
Resetting the environment...
Step: 15081 | Episode Reward: 125.882 | Epsilon: 0.505
Resetting the environment...
Step: 15297 | Episode Reward: 132.467 | Epsilon: 0.500
Resetting the environment...
Step: 15514 | Episode Reward: 117.798 | Epsilon: 0.495
Resetting the environment...
Step: 15719 | Episode Reward: 138.837 | Epsilon: 0.490
Resetting the environment...
Step: 15932 | Episode Reward: 125.272 | Epsilon: 0.485
Resetting the environment...
Step: 16135 | Episode Reward: 132.633 | Epsilon: 0.480
Resetting the environment...
Step: 16352 | Episode Reward: 132.786 | Epsilon: 0.475
Resetting the environme



Step: 22777 | Episode Reward: 119.525 | Epsilon: 0.352
Resetting the environment...
Step: 23019 | Episode Reward: 134.813 | Epsilon: 0.348
Resetting the environment...
Step: 23232 | Episode Reward: 134.214 | Epsilon: 0.345
Resetting the environment...
Step: 23434 | Episode Reward: 136.596 | Epsilon: 0.341
Resetting the environment...
Step: 23630 | Episode Reward: 143.918 | Epsilon: 0.338
Resetting the environment...
Step: 23828 | Episode Reward: 139.516 | Epsilon: 0.334
Resetting the environment...
Step: 24034 | Episode Reward: 142.017 | Epsilon: 0.331
Resetting the environment...
Step: 24228 | Episode Reward: 143.103 | Epsilon: 0.328
Resetting the environment...
Step: 24429 | Episode Reward: 142.174 | Epsilon: 0.324
Resetting the environment...
Step: 24624 | Episode Reward: 140.583 | Epsilon: 0.321
Resetting the environment...
Step: 24818 | Episode Reward: 137.190 | Epsilon: 0.318
Resetting the environment...
Step: 25025 | Episode Reward: 152.379 | Epsilon: 0.315
Resetting the environ



Step: 105489 | Episode Reward: 114.296 | Epsilon: 0.010
Resetting the environment...
Step: 105680 | Episode Reward: 126.924 | Epsilon: 0.010
Resetting the environment...
Step: 105869 | Episode Reward: 102.702 | Epsilon: 0.010
Resetting the environment...
Step: 106058 | Episode Reward: 112.107 | Epsilon: 0.010
Resetting the environment...
Step: 106239 | Episode Reward: 145.721 | Epsilon: 0.010
Resetting the environment...
Step: 106408 | Episode Reward: 152.802 | Epsilon: 0.010
Resetting the environment...
Step: 106593 | Episode Reward: 150.549 | Epsilon: 0.010
Resetting the environment...
Step: 106778 | Episode Reward: 143.648 | Epsilon: 0.010
Resetting the environment...
Step: 106969 | Episode Reward: 146.744 | Epsilon: 0.010
Resetting the environment...
Step: 107157 | Episode Reward: 148.527 | Epsilon: 0.010
Resetting the environment...
Step: 107356 | Episode Reward: 147.194 | Epsilon: 0.010
Resetting the environment...
Step: 107548 | Episode Reward: 149.868 | Epsilon: 0.010
Resetting

In [9]:
ddqn_ema_env = make_env()

ddqn_ema_model = Double_DQN_Implementation(
    env=ddqn_ema_env, 
    learning_rate=1e-4, 
    tensorboard_log="./wds_custom_logs/")

ddqn_ema_model.learn(total_timesteps=200000)

ddqn_ema_model.save("ddqn-ema-no-norm")

Train for 200000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 213 | Episode Reward: 127.336 | Epsilon: 1.000
Resetting the environment...
Step: 434 | Episode Reward: 129.412 | Epsilon: 0.990
Resetting the environment...
Step: 652 | Episode Reward: 126.661 | Epsilon: 0.980
Resetting the environment...
Step: 877 | Episode Reward: 132.330 | Epsilon: 0.970
Resetting the environment...
Step: 1095 | Episode Reward: 123.253 | Epsilon: 0.961
Resetting the environment...
Step: 1322 | Episode Reward: 123.254 | Epsilon: 0.951
Resetting the environment...
Step: 1555 | Episode Reward: 134.047 | Epsilon: 0.941
Resetting the environment...
Step: 1789 | Episode Reward: 113.872 | Epsilon: 0.932
Resetting the environment...
Step: 2019 | Episode Reward: 119.848 | Epsilon: 0.923
Resetting the environment...
Step: 2218 | Episode Reward: 118.757 | Epsilon: 0.914
Resetting the environment...
Step: 2434 | Episode Reward: 129.823 | Epsilon: 0.904
Resetting the environment...
Step: 2655 | Episode Reward: 119.392 | Epsilon: 0.895
Resetting the environment...
Step: 28



Step: 11582 | Episode Reward: 128.818 | Epsilon: 0.593
Resetting the environment...
Step: 11790 | Episode Reward: 144.745 | Epsilon: 0.587
Resetting the environment...
Step: 11996 | Episode Reward: 133.090 | Epsilon: 0.581
Resetting the environment...




Step: 12269 | Episode Reward: 121.118 | Epsilon: 0.575
Resetting the environment...
Step: 12475 | Episode Reward: 136.516 | Epsilon: 0.570
Resetting the environment...
Step: 12705 | Episode Reward: 95.383 | Epsilon: 0.564
Resetting the environment...
Step: 12924 | Episode Reward: 115.284 | Epsilon: 0.558
Resetting the environment...
Step: 13135 | Episode Reward: 108.333 | Epsilon: 0.553
Resetting the environment...
Step: 13344 | Episode Reward: 122.358 | Epsilon: 0.547
Resetting the environment...
Step: 13565 | Episode Reward: 114.072 | Epsilon: 0.542
Resetting the environment...
Step: 13772 | Episode Reward: 122.119 | Epsilon: 0.536
Resetting the environment...




Step: 13982 | Episode Reward: 78.375 | Epsilon: 0.531
Resetting the environment...
Step: 14199 | Episode Reward: 90.082 | Epsilon: 0.526
Resetting the environment...
Step: 14427 | Episode Reward: 126.829 | Epsilon: 0.520
Resetting the environment...
Step: 14624 | Episode Reward: 141.599 | Epsilon: 0.515
Resetting the environment...
Step: 14833 | Episode Reward: 135.756 | Epsilon: 0.510
Resetting the environment...
Step: 15047 | Episode Reward: 130.160 | Epsilon: 0.505
Resetting the environment...
Step: 15264 | Episode Reward: 130.352 | Epsilon: 0.500
Resetting the environment...
Step: 15486 | Episode Reward: 111.844 | Epsilon: 0.495
Resetting the environment...
Step: 15686 | Episode Reward: 135.703 | Epsilon: 0.490
Resetting the environment...
Step: 15882 | Episode Reward: 118.460 | Epsilon: 0.485
Resetting the environment...
Step: 16093 | Episode Reward: 111.894 | Epsilon: 0.480
Resetting the environment...
Step: 16310 | Episode Reward: 129.355 | Epsilon: 0.475
Resetting the environme



Step: 22807 | Episode Reward: 94.433 | Epsilon: 0.348
Resetting the environment...
Step: 23027 | Episode Reward: 120.582 | Epsilon: 0.345
Resetting the environment...
Step: 23238 | Episode Reward: 133.295 | Epsilon: 0.341
Resetting the environment...
Step: 23431 | Episode Reward: 134.381 | Epsilon: 0.338
Resetting the environment...
Step: 23641 | Episode Reward: 121.610 | Epsilon: 0.334
Resetting the environment...
Step: 23848 | Episode Reward: 91.237 | Epsilon: 0.331
Resetting the environment...
Step: 24055 | Episode Reward: 132.441 | Epsilon: 0.328
Resetting the environment...
Step: 24266 | Episode Reward: 117.370 | Epsilon: 0.324
Resetting the environment...
Step: 24487 | Episode Reward: 111.364 | Epsilon: 0.321
Resetting the environment...
Step: 24720 | Episode Reward: 124.122 | Epsilon: 0.318
Resetting the environment...
Step: 24931 | Episode Reward: 89.125 | Epsilon: 0.315
Resetting the environment...
Step: 25154 | Episode Reward: 125.645 | Epsilon: 0.312
Resetting the environmen



Step: 37114 | Episode Reward: 117.168 | Epsilon: 0.176
Resetting the environment...
Step: 37348 | Episode Reward: 97.379 | Epsilon: 0.174
Resetting the environment...
Step: 37546 | Episode Reward: 145.151 | Epsilon: 0.172
Resetting the environment...
Step: 37753 | Episode Reward: 132.443 | Epsilon: 0.171
Resetting the environment...
Step: 37962 | Episode Reward: 134.352 | Epsilon: 0.169
Resetting the environment...
Step: 38179 | Episode Reward: 131.487 | Epsilon: 0.167
Resetting the environment...
Step: 38402 | Episode Reward: 126.553 | Epsilon: 0.165
Resetting the environment...
Step: 38616 | Episode Reward: 141.357 | Epsilon: 0.164
Resetting the environment...
Step: 38830 | Episode Reward: 128.426 | Epsilon: 0.162
Resetting the environment...
Step: 39034 | Episode Reward: 109.233 | Epsilon: 0.161
Resetting the environment...
Step: 39220 | Episode Reward: 125.418 | Epsilon: 0.159
Resetting the environment...
Step: 39417 | Episode Reward: 147.779 | Epsilon: 0.157
Resetting the environm



Step: 40896 | Episode Reward: 110.213 | Epsilon: 0.147
Resetting the environment...
Step: 41098 | Episode Reward: 139.817 | Epsilon: 0.145
Resetting the environment...
Step: 41295 | Episode Reward: 130.949 | Epsilon: 0.144
Resetting the environment...
Step: 41550 | Episode Reward: 107.966 | Epsilon: 0.142
Resetting the environment...
Step: 41764 | Episode Reward: 122.885 | Epsilon: 0.141
Resetting the environment...
Step: 41972 | Episode Reward: 131.889 | Epsilon: 0.139
Resetting the environment...
Step: 42178 | Episode Reward: 137.976 | Epsilon: 0.138
Resetting the environment...
Step: 42386 | Episode Reward: 135.996 | Epsilon: 0.137
Resetting the environment...
Step: 42594 | Episode Reward: 134.401 | Epsilon: 0.135
Resetting the environment...
Step: 42787 | Episode Reward: 92.583 | Epsilon: 0.134
Resetting the environment...




Step: 42981 | Episode Reward: 92.655 | Epsilon: 0.133
Resetting the environment...
Step: 43177 | Episode Reward: 140.366 | Epsilon: 0.131
Resetting the environment...
Step: 43385 | Episode Reward: 138.148 | Epsilon: 0.130
Resetting the environment...
Step: 43572 | Episode Reward: 124.538 | Epsilon: 0.129
Resetting the environment...
Step: 43781 | Episode Reward: 135.324 | Epsilon: 0.127
Resetting the environment...
Step: 43983 | Episode Reward: 135.281 | Epsilon: 0.126
Resetting the environment...
Step: 44175 | Episode Reward: 114.190 | Epsilon: 0.125
Resetting the environment...
Step: 44389 | Episode Reward: 107.879 | Epsilon: 0.124
Resetting the environment...
Step: 44594 | Episode Reward: 117.710 | Epsilon: 0.122
Resetting the environment...
Step: 44803 | Episode Reward: 138.660 | Epsilon: 0.121
Resetting the environment...
Step: 45010 | Episode Reward: 144.544 | Epsilon: 0.120
Resetting the environment...
Step: 45223 | Episode Reward: 138.779 | Epsilon: 0.119
Resetting the environm