In [1]:
import os
import sys
import gym4real
import gymnasium as gym
from gymnasium import spaces
from gym4real.envs.wds.utils import parameter_generator
from gym4real.envs.wds.reward_scaling_wrapper import RewardScalingWrapper
import wntr
import wntr.sim
from DQN import DQN_Implementation, Double_DQN_Implementation

  from pkg_resources import resource_filename


In [2]:
config_path = os.path.join(os.getcwd(), "gym4real", "envs", "wds", "world_anytown.yaml")

base_params = parameter_generator(
    hydraulic_step=3600,
    duration=604800,
    seed=42,
    world_options=config_path)

In [3]:
# Environment using SMA
base_params['demand_moving_average'] = True  # Turn on SMA 
base_params['demand_exp_moving_average'] = False  # Turn off EMA 

In [4]:
def make_env():
    env = gym.make('gym4real/wds-v0', settings=base_params)
    env = RewardScalingWrapper(env)
    
    return env

In [5]:
dqn_sma_env = make_env()

dqn_sma_model = DQN_Implementation(
    env=dqn_sma_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

dqn_sma_model.learn(total_timesteps=300000)

dqn_sma_model.save("dqn-sma")

  gym.logger.warn(
  gym.logger.warn(


Train for 300000 steps
Resetting the environment...
Step: 205 | Episode Reward: 130.347 | Epsilon: 1.000
Resetting the environment...
Step: 430 | Episode Reward: 129.364 | Epsilon: 0.995
Resetting the environment...
Step: 656 | Episode Reward: 131.822 | Epsilon: 0.990
Resetting the environment...
Step: 878 | Episode Reward: 126.743 | Epsilon: 0.985
Resetting the environment...
Step: 1089 | Episode Reward: 126.505 | Epsilon: 0.980
Resetting the environment...
Step: 1312 | Episode Reward: 125.520 | Epsilon: 0.975
Resetting the environment...
Step: 1533 | Episode Reward: 131.591 | Epsilon: 0.970
Resetting the environment...
Step: 1756 | Episode Reward: 110.661 | Epsilon: 0.966
Resetting the environment...
Step: 1985 | Episode Reward: 121.154 | Epsilon: 0.961
Resetting the environment...
Step: 2180 | Episode Reward: 106.839 | Epsilon: 0.956
Resetting the environment...
Step: 2399 | Episode Reward: 123.693 | Epsilon: 0.951
Resetting the environment...




Step: 2624 | Episode Reward: 119.099 | Epsilon: 0.946
Resetting the environment...
Step: 2849 | Episode Reward: 129.741 | Epsilon: 0.942
Resetting the environment...
Step: 3059 | Episode Reward: 123.640 | Epsilon: 0.937
Resetting the environment...
Step: 3258 | Episode Reward: 115.674 | Epsilon: 0.932
Resetting the environment...
Step: 3498 | Episode Reward: 100.453 | Epsilon: 0.928
Resetting the environment...
Step: 3714 | Episode Reward: 118.344 | Epsilon: 0.923
Resetting the environment...
Step: 3922 | Episode Reward: 128.170 | Epsilon: 0.918
Resetting the environment...
Step: 4136 | Episode Reward: 133.838 | Epsilon: 0.914
Resetting the environment...
Step: 4354 | Episode Reward: 124.782 | Epsilon: 0.909
Resetting the environment...
Step: 4578 | Episode Reward: 118.540 | Epsilon: 0.905
Resetting the environment...
Step: 4785 | Episode Reward: 129.927 | Epsilon: 0.900
Resetting the environment...
Step: 5018 | Episode Reward: 102.796 | Epsilon: 0.896
Resetting the environment...
Step



Step: 8383 | Episode Reward: 118.555 | Epsilon: 0.831
Resetting the environment...
Step: 8576 | Episode Reward: 133.120 | Epsilon: 0.827
Resetting the environment...
Step: 8790 | Episode Reward: 129.235 | Epsilon: 0.822
Resetting the environment...
Step: 9016 | Episode Reward: 99.356 | Epsilon: 0.818
Resetting the environment...
Step: 9231 | Episode Reward: 125.503 | Epsilon: 0.814
Resetting the environment...
Step: 9450 | Episode Reward: 81.089 | Epsilon: 0.810
Resetting the environment...
Step: 9662 | Episode Reward: 118.397 | Epsilon: 0.806
Resetting the environment...
Step: 9889 | Episode Reward: 122.680 | Epsilon: 0.802
Resetting the environment...
Step: 10101 | Episode Reward: 124.075 | Epsilon: 0.798
Resetting the environment...
Step: 10300 | Episode Reward: 124.980 | Epsilon: 0.794
Resetting the environment...
Step: 10506 | Episode Reward: 135.203 | Epsilon: 0.790
Resetting the environment...
Step: 10735 | Episode Reward: 121.486 | Epsilon: 0.786
Resetting the environment...
St



Step: 11627 | Episode Reward: 84.266 | Epsilon: 0.771
Resetting the environment...
Step: 11838 | Episode Reward: 143.445 | Epsilon: 0.767
Resetting the environment...
Step: 12063 | Episode Reward: 119.730 | Epsilon: 0.763
Resetting the environment...




Step: 12302 | Episode Reward: 87.354 | Epsilon: 0.759
Resetting the environment...
Step: 12506 | Episode Reward: 129.967 | Epsilon: 0.755
Resetting the environment...
Step: 12746 | Episode Reward: 90.391 | Epsilon: 0.751
Resetting the environment...
Step: 12974 | Episode Reward: 103.834 | Epsilon: 0.748
Resetting the environment...
Step: 13195 | Episode Reward: 116.856 | Epsilon: 0.744
Resetting the environment...
Step: 13407 | Episode Reward: 129.421 | Epsilon: 0.740
Resetting the environment...
Step: 13625 | Episode Reward: 111.985 | Epsilon: 0.737
Resetting the environment...
Step: 13836 | Episode Reward: 112.299 | Epsilon: 0.733
Resetting the environment...




Step: 14060 | Episode Reward: 81.338 | Epsilon: 0.729
Resetting the environment...
Step: 14288 | Episode Reward: 89.358 | Epsilon: 0.726
Resetting the environment...
Step: 14510 | Episode Reward: 98.165 | Epsilon: 0.722
Resetting the environment...
Step: 14720 | Episode Reward: 126.217 | Epsilon: 0.718
Resetting the environment...
Step: 14939 | Episode Reward: 134.709 | Epsilon: 0.715
Resetting the environment...
Step: 15149 | Episode Reward: 116.718 | Epsilon: 0.711
Resetting the environment...
Step: 15364 | Episode Reward: 121.008 | Epsilon: 0.708
Resetting the environment...
Step: 15598 | Episode Reward: 92.348 | Epsilon: 0.704
Resetting the environment...
Step: 15821 | Episode Reward: 140.257 | Epsilon: 0.701
Resetting the environment...
Step: 16035 | Episode Reward: 120.483 | Epsilon: 0.697
Resetting the environment...
Step: 16256 | Episode Reward: 109.949 | Epsilon: 0.694
Resetting the environment...
Step: 16478 | Episode Reward: 128.776 | Epsilon: 0.690
Resetting the environment



Step: 23181 | Episode Reward: 106.392 | Epsilon: 0.594
Resetting the environment...




Step: 23431 | Episode Reward: 88.555 | Epsilon: 0.591
Resetting the environment...
Step: 23676 | Episode Reward: 116.560 | Epsilon: 0.588
Resetting the environment...
Step: 23896 | Episode Reward: 127.689 | Epsilon: 0.585
Resetting the environment...
Step: 24112 | Episode Reward: 135.399 | Epsilon: 0.582
Resetting the environment...
Step: 24330 | Episode Reward: 115.116 | Epsilon: 0.579
Resetting the environment...
Step: 24537 | Episode Reward: 102.904 | Epsilon: 0.576
Resetting the environment...
Step: 24757 | Episode Reward: 133.279 | Epsilon: 0.573
Resetting the environment...
Step: 24997 | Episode Reward: 102.673 | Epsilon: 0.570
Resetting the environment...
Step: 25210 | Episode Reward: 124.373 | Epsilon: 0.568
Resetting the environment...
Step: 25424 | Episode Reward: 120.418 | Epsilon: 0.565
Resetting the environment...
Step: 25636 | Episode Reward: 89.262 | Epsilon: 0.562
Resetting the environment...
Step: 25839 | Episode Reward: 128.133 | Epsilon: 0.559
Resetting the environme



Step: 40939 | Episode Reward: 146.629 | Epsilon: 0.402
Resetting the environment...
Step: 41125 | Episode Reward: 113.324 | Epsilon: 0.400
Resetting the environment...
Step: 41333 | Episode Reward: 138.949 | Epsilon: 0.398
Resetting the environment...
Step: 41549 | Episode Reward: 136.624 | Epsilon: 0.396
Resetting the environment...
Step: 41757 | Episode Reward: 139.413 | Epsilon: 0.394
Resetting the environment...
Step: 41981 | Episode Reward: 134.593 | Epsilon: 0.392
Resetting the environment...
Step: 42219 | Episode Reward: 124.322 | Epsilon: 0.390
Resetting the environment...
Step: 42449 | Episode Reward: 132.096 | Epsilon: 0.388
Resetting the environment...
Step: 42674 | Episode Reward: 141.993 | Epsilon: 0.386
Resetting the environment...
Step: 42902 | Episode Reward: 125.780 | Epsilon: 0.384
Resetting the environment...
Step: 43123 | Episode Reward: 135.741 | Epsilon: 0.382
Resetting the environment...
Step: 43326 | Episode Reward: 122.187 | Epsilon: 0.380
Resetting the environ



Step: 43592 | Episode Reward: 83.433 | Epsilon: 0.378
Resetting the environment...
Step: 43823 | Episode Reward: 123.909 | Epsilon: 0.376
Resetting the environment...
Step: 44043 | Episode Reward: 129.404 | Epsilon: 0.374
Resetting the environment...
Step: 44255 | Episode Reward: 139.870 | Epsilon: 0.373
Resetting the environment...
Step: 44459 | Episode Reward: 137.421 | Epsilon: 0.371
Resetting the environment...
Step: 44666 | Episode Reward: 136.635 | Epsilon: 0.369
Resetting the environment...
Step: 44873 | Episode Reward: 146.481 | Epsilon: 0.367
Resetting the environment...




Step: 45112 | Episode Reward: 126.621 | Epsilon: 0.365
Resetting the environment...
Step: 45319 | Episode Reward: 141.518 | Epsilon: 0.363
Resetting the environment...
Step: 45521 | Episode Reward: 141.658 | Epsilon: 0.361
Resetting the environment...
Step: 45705 | Episode Reward: 126.835 | Epsilon: 0.360
Resetting the environment...
Step: 45906 | Episode Reward: 137.167 | Epsilon: 0.358
Resetting the environment...
Step: 46115 | Episode Reward: 128.172 | Epsilon: 0.356
Resetting the environment...
Step: 46381 | Episode Reward: 111.947 | Epsilon: 0.354
Resetting the environment...
Step: 46605 | Episode Reward: 105.232 | Epsilon: 0.353
Resetting the environment...
Step: 46798 | Episode Reward: 146.401 | Epsilon: 0.351
Resetting the environment...
Step: 47010 | Episode Reward: 131.366 | Epsilon: 0.349
Resetting the environment...
Step: 47216 | Episode Reward: 139.579 | Epsilon: 0.347
Resetting the environment...
Step: 47423 | Episode Reward: 141.386 | Epsilon: 0.346
Resetting the environ



Step: 171336 | Episode Reward: 148.955 | Epsilon: 0.050
Resetting the environment...
Step: 171526 | Episode Reward: 146.979 | Epsilon: 0.050
Resetting the environment...
Step: 171717 | Episode Reward: 139.424 | Epsilon: 0.050
Resetting the environment...
Step: 171927 | Episode Reward: 135.407 | Epsilon: 0.050
Resetting the environment...
Step: 172123 | Episode Reward: 142.929 | Epsilon: 0.050
Resetting the environment...




Step: 172317 | Episode Reward: 94.121 | Epsilon: 0.050
Resetting the environment...
Step: 172531 | Episode Reward: 126.428 | Epsilon: 0.050
Resetting the environment...
Step: 172726 | Episode Reward: 128.078 | Epsilon: 0.050
Resetting the environment...
Step: 172921 | Episode Reward: 68.885 | Epsilon: 0.050
Resetting the environment...
Step: 173130 | Episode Reward: 123.765 | Epsilon: 0.050
Resetting the environment...
Step: 173322 | Episode Reward: 134.518 | Epsilon: 0.050
Resetting the environment...
Step: 173591 | Episode Reward: 112.389 | Epsilon: 0.050
Resetting the environment...
Step: 173844 | Episode Reward: 126.351 | Epsilon: 0.050
Resetting the environment...
Step: 174055 | Episode Reward: 126.402 | Epsilon: 0.050
Resetting the environment...
Step: 174254 | Episode Reward: 141.840 | Epsilon: 0.050
Resetting the environment...
Step: 174460 | Episode Reward: 112.452 | Epsilon: 0.050
Resetting the environment...
Step: 174673 | Episode Reward: 108.702 | Epsilon: 0.050
Resetting t



Step: 175045 | Episode Reward: 82.791 | Epsilon: 0.050
Resetting the environment...




Step: 175237 | Episode Reward: 96.848 | Epsilon: 0.050
Resetting the environment...
Step: 175447 | Episode Reward: 133.834 | Epsilon: 0.050
Resetting the environment...
Step: 175646 | Episode Reward: 123.237 | Epsilon: 0.050
Resetting the environment...
Step: 175859 | Episode Reward: 136.333 | Epsilon: 0.050
Resetting the environment...
Step: 176056 | Episode Reward: 146.509 | Epsilon: 0.050
Resetting the environment...
Step: 176254 | Episode Reward: 138.210 | Epsilon: 0.050
Resetting the environment...
Step: 176455 | Episode Reward: 141.399 | Epsilon: 0.050
Resetting the environment...
Step: 176670 | Episode Reward: 141.628 | Epsilon: 0.050
Resetting the environment...
Step: 176886 | Episode Reward: 136.281 | Epsilon: 0.050
Resetting the environment...
Step: 177151 | Episode Reward: 137.443 | Epsilon: 0.050
Resetting the environment...
Step: 177417 | Episode Reward: 133.666 | Epsilon: 0.050
Resetting the environment...
Step: 177674 | Episode Reward: 135.081 | Epsilon: 0.050
Resetting 



Step: 178562 | Episode Reward: 88.488 | Epsilon: 0.050
Resetting the environment...
Step: 178789 | Episode Reward: 109.403 | Epsilon: 0.050
Resetting the environment...
Step: 179029 | Episode Reward: 122.188 | Epsilon: 0.050
Resetting the environment...
Step: 179233 | Episode Reward: 113.087 | Epsilon: 0.050
Resetting the environment...
Step: 179458 | Episode Reward: 120.519 | Epsilon: 0.050
Resetting the environment...
Step: 179686 | Episode Reward: 110.196 | Epsilon: 0.050
Resetting the environment...
Step: 179875 | Episode Reward: 122.134 | Epsilon: 0.050
Resetting the environment...
Step: 180097 | Episode Reward: 123.761 | Epsilon: 0.050
Resetting the environment...
Step: 180300 | Episode Reward: 149.303 | Epsilon: 0.050
Resetting the environment...
Step: 180502 | Episode Reward: 139.692 | Epsilon: 0.050
Resetting the environment...
Step: 180696 | Episode Reward: 150.883 | Epsilon: 0.050
Resetting the environment...
Step: 180871 | Episode Reward: 152.603 | Epsilon: 0.050
Resetting 

In [6]:
ddqn_sma_env = make_env()

ddqn_sma_model = Double_DQN_Implementation(
    env=ddqn_sma_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

ddqn_sma_model.learn(total_timesteps=300000)

ddqn_sma_model.save("ddqn-sma")

Train for 300000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 214 | Episode Reward: 129.726 | Epsilon: 1.000
Resetting the environment...
Step: 440 | Episode Reward: 125.934 | Epsilon: 0.995
Resetting the environment...
Step: 664 | Episode Reward: 127.972 | Epsilon: 0.990
Resetting the environment...
Step: 887 | Episode Reward: 129.798 | Epsilon: 0.985
Resetting the environment...
Step: 1106 | Episode Reward: 126.058 | Epsilon: 0.980
Resetting the environment...
Step: 1328 | Episode Reward: 123.496 | Epsilon: 0.975
Resetting the environment...
Step: 1557 | Episode Reward: 125.577 | Epsilon: 0.970
Resetting the environment...
Step: 1798 | Episode Reward: 120.419 | Epsilon: 0.966
Resetting the environment...
Step: 2040 | Episode Reward: 124.834 | Epsilon: 0.961
Resetting the environment...
Step: 2234 | Episode Reward: 107.373 | Epsilon: 0.956
Resetting the environment...
Step: 2456 | Episode Reward: 130.739 | Epsilon: 0.951
Resetting the environment...
Step: 2672 | Episode Reward: 121.893 | Epsilon: 0.946
Resetting the environment...
Step: 28



Step: 11960 | Episode Reward: 102.346 | Epsilon: 0.771
Resetting the environment...
Step: 12177 | Episode Reward: 139.863 | Epsilon: 0.767
Resetting the environment...
Step: 12392 | Episode Reward: 124.114 | Epsilon: 0.763
Resetting the environment...
Step: 12655 | Episode Reward: 100.567 | Epsilon: 0.759
Resetting the environment...
Step: 12869 | Episode Reward: 125.537 | Epsilon: 0.755
Resetting the environment...
Step: 13089 | Episode Reward: 84.960 | Epsilon: 0.751
Resetting the environment...
Step: 13319 | Episode Reward: 113.139 | Epsilon: 0.748
Resetting the environment...
Step: 13522 | Episode Reward: 119.702 | Epsilon: 0.744
Resetting the environment...
Step: 13732 | Episode Reward: 126.272 | Epsilon: 0.740
Resetting the environment...
Step: 13945 | Episode Reward: 102.872 | Epsilon: 0.737
Resetting the environment...
Step: 14157 | Episode Reward: 117.931 | Epsilon: 0.733
Resetting the environment...




Step: 14405 | Episode Reward: 104.267 | Epsilon: 0.729
Resetting the environment...
Step: 14634 | Episode Reward: 92.483 | Epsilon: 0.726
Resetting the environment...
Step: 14864 | Episode Reward: 99.740 | Epsilon: 0.722
Resetting the environment...
Step: 15065 | Episode Reward: 129.518 | Epsilon: 0.718
Resetting the environment...
Step: 15293 | Episode Reward: 135.093 | Epsilon: 0.715
Resetting the environment...
Step: 15495 | Episode Reward: 107.744 | Epsilon: 0.711
Resetting the environment...
Step: 15712 | Episode Reward: 122.595 | Epsilon: 0.708
Resetting the environment...
Step: 15939 | Episode Reward: 112.197 | Epsilon: 0.704
Resetting the environment...
Step: 16155 | Episode Reward: 130.145 | Epsilon: 0.701
Resetting the environment...
Step: 16390 | Episode Reward: 124.881 | Epsilon: 0.697
Resetting the environment...
Step: 16616 | Episode Reward: 121.889 | Epsilon: 0.694
Resetting the environment...
Step: 16840 | Episode Reward: 131.526 | Epsilon: 0.690
Resetting the environme



Step: 23250 | Episode Reward: 106.746 | Epsilon: 0.594
Resetting the environment...




Step: 23460 | Episode Reward: 81.062 | Epsilon: 0.591
Resetting the environment...
Step: 23673 | Episode Reward: 124.791 | Epsilon: 0.588
Resetting the environment...
Step: 23892 | Episode Reward: 129.439 | Epsilon: 0.585
Resetting the environment...
Step: 24098 | Episode Reward: 137.246 | Epsilon: 0.582
Resetting the environment...
Step: 24305 | Episode Reward: 120.950 | Epsilon: 0.579
Resetting the environment...
Step: 24518 | Episode Reward: 98.887 | Epsilon: 0.576
Resetting the environment...
Step: 24740 | Episode Reward: 130.833 | Epsilon: 0.573
Resetting the environment...
Step: 24966 | Episode Reward: 114.189 | Epsilon: 0.570
Resetting the environment...
Step: 25189 | Episode Reward: 121.149 | Epsilon: 0.568
Resetting the environment...
Step: 25425 | Episode Reward: 117.117 | Epsilon: 0.565
Resetting the environment...
Step: 25652 | Episode Reward: 92.233 | Epsilon: 0.562
Resetting the environment...
Step: 25855 | Episode Reward: 128.938 | Epsilon: 0.559
Resetting the environmen



Step: 26078 | Episode Reward: 87.341 | Epsilon: 0.556
Resetting the environment...
Step: 26287 | Episode Reward: 129.059 | Epsilon: 0.554
Resetting the environment...
Step: 26501 | Episode Reward: 104.548 | Epsilon: 0.551
Resetting the environment...
Step: 26714 | Episode Reward: 129.100 | Epsilon: 0.548
Resetting the environment...
Step: 26948 | Episode Reward: 89.285 | Epsilon: 0.545
Resetting the environment...
Step: 27153 | Episode Reward: 139.522 | Epsilon: 0.543
Resetting the environment...
Step: 27359 | Episode Reward: 124.340 | Epsilon: 0.540
Resetting the environment...
Step: 27563 | Episode Reward: 133.967 | Epsilon: 0.537
Resetting the environment...
Step: 27773 | Episode Reward: 118.428 | Epsilon: 0.534
Resetting the environment...
Step: 27967 | Episode Reward: 134.351 | Epsilon: 0.532
Resetting the environment...
Step: 28168 | Episode Reward: 88.856 | Epsilon: 0.529
Resetting the environment...
Step: 28373 | Episode Reward: 83.697 | Epsilon: 0.526
Resetting the environment



Step: 30873 | Episode Reward: 100.713 | Epsilon: 0.496
Resetting the environment...
Step: 31086 | Episode Reward: 115.551 | Epsilon: 0.493
Resetting the environment...
Step: 31290 | Episode Reward: 131.098 | Epsilon: 0.491
Resetting the environment...
Step: 31493 | Episode Reward: 81.420 | Epsilon: 0.488
Resetting the environment...
Step: 31713 | Episode Reward: 95.892 | Epsilon: 0.486
Resetting the environment...
Step: 31925 | Episode Reward: 105.656 | Epsilon: 0.483
Resetting the environment...
Step: 32142 | Episode Reward: 114.058 | Epsilon: 0.481
Resetting the environment...
Step: 32354 | Episode Reward: 134.352 | Epsilon: 0.479
Resetting the environment...
Step: 32564 | Episode Reward: 127.630 | Epsilon: 0.476
Resetting the environment...
Step: 32770 | Episode Reward: 126.677 | Epsilon: 0.474
Resetting the environment...
Step: 33009 | Episode Reward: 119.990 | Epsilon: 0.471
Resetting the environment...
Step: 33219 | Episode Reward: 141.699 | Epsilon: 0.469
Resetting the environme



Step: 37442 | Episode Reward: 102.129 | Epsilon: 0.424
Resetting the environment...
Step: 37667 | Episode Reward: 121.629 | Epsilon: 0.422
Resetting the environment...




Step: 37890 | Episode Reward: 112.603 | Epsilon: 0.420
Resetting the environment...
Step: 38088 | Episode Reward: 84.643 | Epsilon: 0.418
Resetting the environment...
Step: 38291 | Episode Reward: 134.821 | Epsilon: 0.416
Resetting the environment...
Step: 38497 | Episode Reward: 125.502 | Epsilon: 0.414
Resetting the environment...
Step: 38698 | Episode Reward: 134.303 | Epsilon: 0.412
Resetting the environment...
Step: 38903 | Episode Reward: 117.121 | Epsilon: 0.410
Resetting the environment...
Step: 39102 | Episode Reward: 106.190 | Epsilon: 0.408
Resetting the environment...
Step: 39323 | Episode Reward: 116.216 | Epsilon: 0.406
Resetting the environment...
Step: 39533 | Episode Reward: 105.201 | Epsilon: 0.404
Resetting the environment...




Step: 39741 | Episode Reward: 88.789 | Epsilon: 0.402
Resetting the environment...
Step: 39935 | Episode Reward: 124.128 | Epsilon: 0.400
Resetting the environment...
Step: 40138 | Episode Reward: 146.904 | Epsilon: 0.398
Resetting the environment...




Step: 40340 | Episode Reward: 125.581 | Epsilon: 0.396
Resetting the environment...
Step: 40545 | Episode Reward: 135.770 | Epsilon: 0.394
Resetting the environment...
Step: 40752 | Episode Reward: 118.975 | Epsilon: 0.392
Resetting the environment...
Step: 40965 | Episode Reward: 119.917 | Epsilon: 0.390
Resetting the environment...
Step: 41179 | Episode Reward: 124.144 | Epsilon: 0.388
Resetting the environment...
Step: 41388 | Episode Reward: 143.437 | Epsilon: 0.386
Resetting the environment...
Step: 41593 | Episode Reward: 121.804 | Epsilon: 0.384
Resetting the environment...
Step: 41792 | Episode Reward: 139.056 | Epsilon: 0.382
Resetting the environment...
Step: 41990 | Episode Reward: 119.155 | Epsilon: 0.380
Resetting the environment...




Step: 42186 | Episode Reward: 72.067 | Epsilon: 0.378
Resetting the environment...
Step: 42416 | Episode Reward: 124.816 | Epsilon: 0.376
Resetting the environment...
Step: 42623 | Episode Reward: 128.787 | Epsilon: 0.374
Resetting the environment...
Step: 42839 | Episode Reward: 129.414 | Epsilon: 0.373
Resetting the environment...
Step: 43048 | Episode Reward: 124.012 | Epsilon: 0.371
Resetting the environment...
Step: 43250 | Episode Reward: 121.499 | Epsilon: 0.369
Resetting the environment...
Step: 43463 | Episode Reward: 110.527 | Epsilon: 0.367
Resetting the environment...




Step: 43669 | Episode Reward: 79.202 | Epsilon: 0.365
Resetting the environment...
Step: 43896 | Episode Reward: 118.846 | Epsilon: 0.363
Resetting the environment...
Step: 44109 | Episode Reward: 138.017 | Epsilon: 0.361
Resetting the environment...
Step: 44295 | Episode Reward: 128.321 | Epsilon: 0.360
Resetting the environment...
Step: 44493 | Episode Reward: 118.231 | Epsilon: 0.358
Resetting the environment...
Step: 44708 | Episode Reward: 95.840 | Epsilon: 0.356
Resetting the environment...
Step: 44942 | Episode Reward: 133.669 | Epsilon: 0.354
Resetting the environment...
Step: 45155 | Episode Reward: 101.736 | Epsilon: 0.353
Resetting the environment...
Step: 45343 | Episode Reward: 149.086 | Epsilon: 0.351
Resetting the environment...
Step: 45551 | Episode Reward: 130.379 | Epsilon: 0.349
Resetting the environment...
Step: 45765 | Episode Reward: 128.220 | Epsilon: 0.347
Resetting the environment...
Step: 45984 | Episode Reward: 130.808 | Epsilon: 0.346
Resetting the environme



Step: 46587 | Episode Reward: 77.736 | Epsilon: 0.340
Resetting the environment...




Step: 46803 | Episode Reward: 110.457 | Epsilon: 0.339
Resetting the environment...
Step: 47002 | Episode Reward: 82.651 | Epsilon: 0.337
Resetting the environment...
Step: 47197 | Episode Reward: 83.441 | Epsilon: 0.335
Resetting the environment...
Step: 47405 | Episode Reward: 112.276 | Epsilon: 0.334
Resetting the environment...
Step: 47613 | Episode Reward: 71.719 | Epsilon: 0.332
Resetting the environment...
Step: 47811 | Episode Reward: 127.701 | Epsilon: 0.330
Resetting the environment...
Step: 48027 | Episode Reward: 139.678 | Epsilon: 0.329
Resetting the environment...
Step: 48235 | Episode Reward: 124.512 | Epsilon: 0.327
Resetting the environment...
Step: 48457 | Episode Reward: 117.344 | Epsilon: 0.325
Resetting the environment...
Step: 48663 | Episode Reward: 126.223 | Epsilon: 0.324
Resetting the environment...
Step: 48855 | Episode Reward: 82.735 | Epsilon: 0.322
Resetting the environment...
Step: 49064 | Episode Reward: 131.685 | Epsilon: 0.321
Resetting the environment



Step: 55660 | Episode Reward: 110.374 | Epsilon: 0.273
Resetting the environment...
Step: 55857 | Episode Reward: 80.959 | Epsilon: 0.272
Resetting the environment...
Step: 56058 | Episode Reward: 124.775 | Epsilon: 0.270
Resetting the environment...
Step: 56255 | Episode Reward: 135.168 | Epsilon: 0.269
Resetting the environment...
Step: 56461 | Episode Reward: 136.543 | Epsilon: 0.268
Resetting the environment...
Step: 56655 | Episode Reward: 110.206 | Epsilon: 0.266
Resetting the environment...
Step: 56855 | Episode Reward: 107.902 | Epsilon: 0.265
Resetting the environment...
Step: 57054 | Episode Reward: 125.990 | Epsilon: 0.264
Resetting the environment...
Step: 57239 | Episode Reward: 87.606 | Epsilon: 0.262
Resetting the environment...
Step: 57447 | Episode Reward: 125.397 | Epsilon: 0.261
Resetting the environment...
Step: 57655 | Episode Reward: 116.154 | Epsilon: 0.260
Resetting the environment...
Step: 57862 | Episode Reward: 119.886 | Epsilon: 0.258
Resetting the environme



Step: 59116 | Episode Reward: 73.159 | Epsilon: 0.251
Resetting the environment...
Step: 59316 | Episode Reward: 83.854 | Epsilon: 0.249
Resetting the environment...
Step: 59514 | Episode Reward: 112.674 | Epsilon: 0.248
Resetting the environment...
Step: 59718 | Episode Reward: 92.714 | Epsilon: 0.247
Resetting the environment...
Step: 59925 | Episode Reward: 114.848 | Epsilon: 0.246
Resetting the environment...
Step: 60120 | Episode Reward: 134.704 | Epsilon: 0.245
Resetting the environment...
Step: 60310 | Episode Reward: 137.241 | Epsilon: 0.243
Resetting the environment...
Step: 60504 | Episode Reward: 146.386 | Epsilon: 0.242
Resetting the environment...
Step: 60691 | Episode Reward: 144.640 | Epsilon: 0.241
Resetting the environment...
Step: 60887 | Episode Reward: 140.419 | Epsilon: 0.240
Resetting the environment...
Step: 61090 | Episode Reward: 135.876 | Epsilon: 0.238
Resetting the environment...
Step: 61286 | Episode Reward: 137.013 | Epsilon: 0.237
Resetting the environmen



Step: 173844 | Episode Reward: 75.483 | Epsilon: 0.050
Resetting the environment...




Step: 174020 | Episode Reward: 69.204 | Epsilon: 0.050
Resetting the environment...
Step: 174215 | Episode Reward: 109.266 | Epsilon: 0.050
Resetting the environment...
Step: 174411 | Episode Reward: 105.094 | Epsilon: 0.050
Resetting the environment...
Step: 174608 | Episode Reward: 111.019 | Epsilon: 0.050
Resetting the environment...
Step: 174807 | Episode Reward: 104.376 | Epsilon: 0.050
Resetting the environment...
Step: 175000 | Episode Reward: 97.444 | Epsilon: 0.050
Resetting the environment...
Step: 175208 | Episode Reward: 125.583 | Epsilon: 0.050
Resetting the environment...
Step: 175406 | Episode Reward: 111.554 | Epsilon: 0.050
Resetting the environment...
Step: 175602 | Episode Reward: 97.300 | Epsilon: 0.050
Resetting the environment...
Step: 175793 | Episode Reward: 143.195 | Epsilon: 0.050
Resetting the environment...
Step: 175981 | Episode Reward: 135.668 | Epsilon: 0.050
Resetting the environment...
Step: 176168 | Episode Reward: 140.829 | Epsilon: 0.050
Resetting th



Step: 191554 | Episode Reward: 109.294 | Epsilon: 0.050
Resetting the environment...
Step: 191737 | Episode Reward: 86.758 | Epsilon: 0.050
Resetting the environment...
Step: 191935 | Episode Reward: 136.524 | Epsilon: 0.050
Resetting the environment...




Step: 192125 | Episode Reward: 116.344 | Epsilon: 0.050
Resetting the environment...
Step: 192302 | Episode Reward: 65.975 | Epsilon: 0.050
Resetting the environment...
Step: 192482 | Episode Reward: 11.852 | Epsilon: 0.050
Resetting the environment...
Step: 192658 | Episode Reward: 8.768 | Epsilon: 0.050
Resetting the environment...
Step: 192840 | Episode Reward: 28.342 | Epsilon: 0.050
Resetting the environment...
Step: 193025 | Episode Reward: 53.800 | Epsilon: 0.050
Resetting the environment...
Step: 193208 | Episode Reward: 19.655 | Epsilon: 0.050
Resetting the environment...
Step: 193386 | Episode Reward: 29.018 | Epsilon: 0.050
Resetting the environment...
Step: 193564 | Episode Reward: 11.387 | Epsilon: 0.050
Resetting the environment...
Step: 193743 | Episode Reward: 10.278 | Epsilon: 0.050
Resetting the environment...
Step: 193931 | Episode Reward: 48.694 | Epsilon: 0.050
Resetting the environment...
Step: 194107 | Episode Reward: 11.986 | Epsilon: 0.050
Resetting the environ

In [3]:
# Environment using EMA
base_params['demand_moving_average'] = False  # Turn off SMA 
base_params['demand_exp_moving_average'] = True  # Turn on EMA 

In [8]:
dqn_ema_env = make_env()

dqn_ema_model = DQN_Implementation(
    env=dqn_ema_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

dqn_ema_model.learn(total_timesteps=300000)

dqn_ema_model.save("dqn-ema")

Train for 300000 steps
Resetting the environment...


  gym.logger.warn(
  gym.logger.warn(


Step: 217 | Episode Reward: 131.717 | Epsilon: 1.000
Resetting the environment...
Step: 448 | Episode Reward: 127.818 | Epsilon: 0.995
Resetting the environment...
Step: 657 | Episode Reward: 129.814 | Epsilon: 0.990
Resetting the environment...
Step: 867 | Episode Reward: 133.418 | Epsilon: 0.985
Resetting the environment...
Step: 1098 | Episode Reward: 131.665 | Epsilon: 0.980
Resetting the environment...
Step: 1305 | Episode Reward: 128.559 | Epsilon: 0.975
Resetting the environment...
Step: 1533 | Episode Reward: 127.689 | Epsilon: 0.970
Resetting the environment...
Step: 1771 | Episode Reward: 117.240 | Epsilon: 0.966
Resetting the environment...
Step: 1999 | Episode Reward: 125.991 | Epsilon: 0.961
Resetting the environment...
Step: 2195 | Episode Reward: 111.656 | Epsilon: 0.956
Resetting the environment...
Step: 2420 | Episode Reward: 124.846 | Epsilon: 0.951
Resetting the environment...




Step: 2632 | Episode Reward: 114.305 | Epsilon: 0.946
Resetting the environment...
Step: 2849 | Episode Reward: 129.737 | Epsilon: 0.942
Resetting the environment...
Step: 3072 | Episode Reward: 127.711 | Epsilon: 0.937
Resetting the environment...
Step: 3274 | Episode Reward: 121.825 | Epsilon: 0.932
Resetting the environment...
Step: 3512 | Episode Reward: 100.037 | Epsilon: 0.928
Resetting the environment...
Step: 3725 | Episode Reward: 119.185 | Epsilon: 0.923
Resetting the environment...
Step: 3936 | Episode Reward: 131.922 | Epsilon: 0.918
Resetting the environment...
Step: 4159 | Episode Reward: 136.510 | Epsilon: 0.914
Resetting the environment...
Step: 4383 | Episode Reward: 121.571 | Epsilon: 0.909
Resetting the environment...
Step: 4605 | Episode Reward: 123.650 | Epsilon: 0.905
Resetting the environment...
Step: 4811 | Episode Reward: 132.658 | Epsilon: 0.900
Resetting the environment...
Step: 5037 | Episode Reward: 99.544 | Epsilon: 0.896
Resetting the environment...
Step:



Step: 11722 | Episode Reward: 83.484 | Epsilon: 0.771
Resetting the environment...
Step: 11940 | Episode Reward: 140.043 | Epsilon: 0.767
Resetting the environment...
Step: 12170 | Episode Reward: 128.945 | Epsilon: 0.763
Resetting the environment...
Step: 12401 | Episode Reward: 92.291 | Epsilon: 0.759
Resetting the environment...
Step: 12614 | Episode Reward: 128.950 | Epsilon: 0.755
Resetting the environment...
Step: 12850 | Episode Reward: 88.100 | Epsilon: 0.751
Resetting the environment...
Step: 13069 | Episode Reward: 107.786 | Epsilon: 0.748
Resetting the environment...
Step: 13284 | Episode Reward: 123.089 | Epsilon: 0.744
Resetting the environment...
Step: 13487 | Episode Reward: 129.261 | Epsilon: 0.740
Resetting the environment...
Step: 13697 | Episode Reward: 114.956 | Epsilon: 0.737
Resetting the environment...
Step: 13903 | Episode Reward: 114.892 | Epsilon: 0.733
Resetting the environment...




Step: 14142 | Episode Reward: 86.077 | Epsilon: 0.729
Resetting the environment...
Step: 14367 | Episode Reward: 108.896 | Epsilon: 0.726
Resetting the environment...
Step: 14605 | Episode Reward: 112.296 | Epsilon: 0.722
Resetting the environment...
Step: 14822 | Episode Reward: 131.146 | Epsilon: 0.718
Resetting the environment...
Step: 15034 | Episode Reward: 131.418 | Epsilon: 0.715
Resetting the environment...
Step: 15244 | Episode Reward: 125.331 | Epsilon: 0.711
Resetting the environment...
Step: 15480 | Episode Reward: 125.005 | Epsilon: 0.708
Resetting the environment...
Step: 15717 | Episode Reward: 106.588 | Epsilon: 0.704
Resetting the environment...
Step: 15943 | Episode Reward: 142.806 | Epsilon: 0.701
Resetting the environment...
Step: 16162 | Episode Reward: 121.217 | Epsilon: 0.697
Resetting the environment...
Step: 16373 | Episode Reward: 116.811 | Epsilon: 0.694
Resetting the environment...
Step: 16591 | Episode Reward: 127.339 | Epsilon: 0.690
Resetting the environm



Step: 25875 | Episode Reward: 124.475 | Epsilon: 0.556
Resetting the environment...
Step: 26081 | Episode Reward: 134.929 | Epsilon: 0.554
Resetting the environment...
Step: 26286 | Episode Reward: 139.828 | Epsilon: 0.551
Resetting the environment...
Step: 26501 | Episode Reward: 136.465 | Epsilon: 0.548
Resetting the environment...
Step: 26765 | Episode Reward: 126.283 | Epsilon: 0.545
Resetting the environment...
Step: 26981 | Episode Reward: 142.716 | Epsilon: 0.543
Resetting the environment...
Step: 27187 | Episode Reward: 129.552 | Epsilon: 0.540
Resetting the environment...
Step: 27388 | Episode Reward: 140.133 | Epsilon: 0.537
Resetting the environment...
Step: 27590 | Episode Reward: 133.232 | Epsilon: 0.534
Resetting the environment...
Step: 27785 | Episode Reward: 144.036 | Epsilon: 0.532
Resetting the environment...
Step: 27972 | Episode Reward: 143.334 | Epsilon: 0.529
Resetting the environment...
Step: 28243 | Episode Reward: 137.087 | Epsilon: 0.526
Resetting the environ



Step: 37144 | Episode Reward: 134.867 | Epsilon: 0.424
Resetting the environment...
Step: 37346 | Episode Reward: 137.937 | Epsilon: 0.422
Resetting the environment...
Step: 37557 | Episode Reward: 131.962 | Epsilon: 0.420
Resetting the environment...
Step: 37754 | Episode Reward: 139.312 | Epsilon: 0.418
Resetting the environment...
Step: 37957 | Episode Reward: 136.454 | Epsilon: 0.416
Resetting the environment...
Step: 38157 | Episode Reward: 132.672 | Epsilon: 0.414
Resetting the environment...
Step: 38361 | Episode Reward: 140.324 | Epsilon: 0.412
Resetting the environment...
Step: 38561 | Episode Reward: 145.030 | Epsilon: 0.410
Resetting the environment...
Step: 38750 | Episode Reward: 141.518 | Epsilon: 0.408
Resetting the environment...
Step: 38962 | Episode Reward: 143.444 | Epsilon: 0.406
Resetting the environment...
Step: 39165 | Episode Reward: 138.512 | Epsilon: 0.404
Resetting the environment...
Step: 39386 | Episode Reward: 150.863 | Epsilon: 0.402
Resetting the environ



Step: 41790 | Episode Reward: 130.802 | Epsilon: 0.378
Resetting the environment...
Step: 41977 | Episode Reward: 140.708 | Epsilon: 0.376
Resetting the environment...
Step: 42185 | Episode Reward: 132.836 | Epsilon: 0.374
Resetting the environment...
Step: 42381 | Episode Reward: 137.885 | Epsilon: 0.373
Resetting the environment...
Step: 42577 | Episode Reward: 139.131 | Epsilon: 0.371
Resetting the environment...
Step: 42779 | Episode Reward: 137.386 | Epsilon: 0.369
Resetting the environment...
Step: 42973 | Episode Reward: 148.047 | Epsilon: 0.367
Resetting the environment...
Step: 43201 | Episode Reward: 122.056 | Epsilon: 0.365
Resetting the environment...
Step: 43401 | Episode Reward: 144.167 | Epsilon: 0.363
Resetting the environment...
Step: 43602 | Episode Reward: 144.451 | Epsilon: 0.361
Resetting the environment...
Step: 43794 | Episode Reward: 132.611 | Epsilon: 0.360
Resetting the environment...
Step: 43986 | Episode Reward: 137.593 | Epsilon: 0.358
Resetting the environ



Step: 45971 | Episode Reward: 141.140 | Epsilon: 0.340
Resetting the environment...
Step: 46174 | Episode Reward: 129.199 | Epsilon: 0.339
Resetting the environment...
Step: 46355 | Episode Reward: 150.132 | Epsilon: 0.337
Resetting the environment...
Step: 46564 | Episode Reward: 143.881 | Epsilon: 0.335
Resetting the environment...
Step: 46754 | Episode Reward: 142.996 | Epsilon: 0.334
Resetting the environment...
Step: 46951 | Episode Reward: 142.671 | Epsilon: 0.332
Resetting the environment...
Step: 47154 | Episode Reward: 141.182 | Epsilon: 0.330
Resetting the environment...
Step: 47353 | Episode Reward: 112.929 | Epsilon: 0.329
Resetting the environment...
Step: 47552 | Episode Reward: 128.018 | Epsilon: 0.327
Resetting the environment...
Step: 47756 | Episode Reward: 110.394 | Epsilon: 0.325
Resetting the environment...
Step: 47955 | Episode Reward: 125.698 | Epsilon: 0.324
Resetting the environment...
Step: 48151 | Episode Reward: 136.851 | Epsilon: 0.322
Resetting the environ



Step: 60164 | Episode Reward: 147.039 | Epsilon: 0.235
Resetting the environment...
Step: 60348 | Episode Reward: 144.513 | Epsilon: 0.234
Resetting the environment...
Step: 60534 | Episode Reward: 139.805 | Epsilon: 0.233
Resetting the environment...
Step: 60717 | Episode Reward: 146.038 | Epsilon: 0.231
Resetting the environment...
Step: 60906 | Episode Reward: 154.809 | Epsilon: 0.230
Resetting the environment...
Step: 61094 | Episode Reward: 151.583 | Epsilon: 0.229
Resetting the environment...
Step: 61277 | Episode Reward: 146.849 | Epsilon: 0.228
Resetting the environment...
Step: 61470 | Episode Reward: 150.518 | Epsilon: 0.227
Resetting the environment...
Step: 61653 | Episode Reward: 146.833 | Epsilon: 0.226
Resetting the environment...
Step: 61844 | Episode Reward: 143.583 | Epsilon: 0.225
Resetting the environment...
Step: 62040 | Episode Reward: 153.982 | Epsilon: 0.223
Resetting the environment...
Step: 62224 | Episode Reward: 143.501 | Epsilon: 0.222
Resetting the environ

In [5]:
ddqn_ema_env = make_env()

ddqn_ema_model = Double_DQN_Implementation(
    env=ddqn_ema_env, 
    learning_rate=3e-4, 
    tensorboard_log="./wds_custom_logs/")

ddqn_ema_model.learn(total_timesteps=300000)

ddqn_ema_model.save("ddqn-ema")

  gym.logger.warn(
  gym.logger.warn(


Train for 300000 steps
Resetting the environment...
Step: 215 | Episode Reward: 127.184 | Epsilon: 1.000
Resetting the environment...
Step: 440 | Episode Reward: 124.237 | Epsilon: 0.995
Resetting the environment...
Step: 654 | Episode Reward: 120.241 | Epsilon: 0.990
Resetting the environment...
Step: 872 | Episode Reward: 131.836 | Epsilon: 0.985
Resetting the environment...
Step: 1098 | Episode Reward: 129.391 | Epsilon: 0.980
Resetting the environment...
Step: 1324 | Episode Reward: 126.056 | Epsilon: 0.975
Resetting the environment...
Step: 1558 | Episode Reward: 130.757 | Epsilon: 0.970
Resetting the environment...
Step: 1784 | Episode Reward: 116.313 | Epsilon: 0.966
Resetting the environment...
Step: 2008 | Episode Reward: 124.782 | Epsilon: 0.961
Resetting the environment...
Step: 2203 | Episode Reward: 119.407 | Epsilon: 0.956
Resetting the environment...
Step: 2432 | Episode Reward: 126.404 | Epsilon: 0.951
Resetting the environment...
Step: 2660 | Episode Reward: 120.692 | 



Step: 8488 | Episode Reward: 118.670 | Epsilon: 0.831
Resetting the environment...
Step: 8701 | Episode Reward: 131.919 | Epsilon: 0.827
Resetting the environment...
Step: 8915 | Episode Reward: 132.803 | Epsilon: 0.822
Resetting the environment...
Step: 9128 | Episode Reward: 105.491 | Epsilon: 0.818
Resetting the environment...
Step: 9339 | Episode Reward: 122.596 | Epsilon: 0.814
Resetting the environment...
Step: 9577 | Episode Reward: 84.224 | Epsilon: 0.810
Resetting the environment...
Step: 9793 | Episode Reward: 115.508 | Epsilon: 0.806
Resetting the environment...
Step: 10008 | Episode Reward: 124.986 | Epsilon: 0.802
Resetting the environment...
Step: 10230 | Episode Reward: 119.467 | Epsilon: 0.798
Resetting the environment...
Step: 10440 | Episode Reward: 127.117 | Epsilon: 0.794
Resetting the environment...
Step: 10650 | Episode Reward: 135.094 | Epsilon: 0.790
Resetting the environment...
Step: 10868 | Episode Reward: 123.536 | Epsilon: 0.786
Resetting the environment...




Step: 11802 | Episode Reward: 94.400 | Epsilon: 0.771
Resetting the environment...
Step: 12019 | Episode Reward: 137.980 | Epsilon: 0.767
Resetting the environment...
Step: 12237 | Episode Reward: 123.761 | Epsilon: 0.763
Resetting the environment...




Step: 12480 | Episode Reward: 92.699 | Epsilon: 0.759
Resetting the environment...
Step: 12687 | Episode Reward: 129.930 | Epsilon: 0.755
Resetting the environment...
Step: 12932 | Episode Reward: 94.385 | Epsilon: 0.751
Resetting the environment...
Step: 13167 | Episode Reward: 108.281 | Epsilon: 0.748
Resetting the environment...
Step: 13378 | Episode Reward: 111.243 | Epsilon: 0.744
Resetting the environment...
Step: 13598 | Episode Reward: 125.559 | Epsilon: 0.740
Resetting the environment...
Step: 13818 | Episode Reward: 110.083 | Epsilon: 0.737
Resetting the environment...
Step: 14017 | Episode Reward: 120.133 | Epsilon: 0.733
Resetting the environment...




Step: 14242 | Episode Reward: 83.239 | Epsilon: 0.729
Resetting the environment...
Step: 14464 | Episode Reward: 86.743 | Epsilon: 0.726
Resetting the environment...
Step: 14699 | Episode Reward: 90.978 | Epsilon: 0.722
Resetting the environment...
Step: 14903 | Episode Reward: 128.842 | Epsilon: 0.718
Resetting the environment...
Step: 15113 | Episode Reward: 132.518 | Epsilon: 0.715
Resetting the environment...
Step: 15320 | Episode Reward: 124.281 | Epsilon: 0.711
Resetting the environment...
Step: 15529 | Episode Reward: 122.752 | Epsilon: 0.708
Resetting the environment...
Step: 15764 | Episode Reward: 91.062 | Epsilon: 0.704
Resetting the environment...
Step: 15973 | Episode Reward: 144.673 | Epsilon: 0.701
Resetting the environment...
Step: 16183 | Episode Reward: 104.327 | Epsilon: 0.697
Resetting the environment...
Step: 16398 | Episode Reward: 106.533 | Epsilon: 0.694
Resetting the environment...
Step: 16617 | Episode Reward: 130.407 | Epsilon: 0.690
Resetting the environment



Step: 215312 | Episode Reward: 74.639 | Epsilon: 0.050
Resetting the environment...
Step: 215503 | Episode Reward: 112.012 | Epsilon: 0.050
Resetting the environment...
Step: 215697 | Episode Reward: 111.956 | Epsilon: 0.050
Resetting the environment...
Step: 215885 | Episode Reward: 37.628 | Epsilon: 0.050
Resetting the environment...
Step: 216076 | Episode Reward: 105.289 | Epsilon: 0.050
Resetting the environment...
Step: 216267 | Episode Reward: 112.424 | Epsilon: 0.050
Resetting the environment...
Step: 216459 | Episode Reward: 107.802 | Epsilon: 0.050
Resetting the environment...
Step: 216630 | Episode Reward: 81.027 | Epsilon: 0.050
Resetting the environment...
Step: 216826 | Episode Reward: 107.419 | Epsilon: 0.050
Resetting the environment...
Step: 217059 | Episode Reward: 137.114 | Epsilon: 0.050
Resetting the environment...
Step: 217248 | Episode Reward: 104.199 | Epsilon: 0.050
Resetting the environment...
Step: 217437 | Episode Reward: 149.699 | Epsilon: 0.050
Resetting th



Step: 224812 | Episode Reward: 31.006 | Epsilon: 0.050
Resetting the environment...
Step: 224994 | Episode Reward: 14.560 | Epsilon: 0.050
Resetting the environment...
Step: 225177 | Episode Reward: 20.773 | Epsilon: 0.050
Resetting the environment...
Step: 225362 | Episode Reward: 27.402 | Epsilon: 0.050
Resetting the environment...
Step: 225541 | Episode Reward: 9.450 | Epsilon: 0.050
Resetting the environment...
Step: 225732 | Episode Reward: 57.230 | Epsilon: 0.050
Resetting the environment...
Step: 225920 | Episode Reward: 53.276 | Epsilon: 0.050
Resetting the environment...
Step: 226097 | Episode Reward: 6.906 | Epsilon: 0.050
Resetting the environment...
Step: 226286 | Episode Reward: 65.597 | Epsilon: 0.050
Resetting the environment...
Step: 226465 | Episode Reward: 11.704 | Epsilon: 0.050
Resetting the environment...
Step: 226650 | Episode Reward: 54.860 | Epsilon: 0.050
Resetting the environment...
Step: 226836 | Episode Reward: 55.875 | Epsilon: 0.050
Resetting the environme



Step: 244245 | Episode Reward: 25.880 | Epsilon: 0.050
Resetting the environment...
Step: 244427 | Episode Reward: 16.496 | Epsilon: 0.050
Resetting the environment...
Step: 244610 | Episode Reward: 16.424 | Epsilon: 0.050
Resetting the environment...
Step: 244793 | Episode Reward: 52.832 | Epsilon: 0.050
Resetting the environment...
Step: 244972 | Episode Reward: 48.606 | Epsilon: 0.050
Resetting the environment...
Step: 245158 | Episode Reward: 48.302 | Epsilon: 0.050
Resetting the environment...
Step: 245342 | Episode Reward: 13.690 | Epsilon: 0.050
Resetting the environment...
Step: 245518 | Episode Reward: 10.203 | Epsilon: 0.050
Resetting the environment...
Step: 245703 | Episode Reward: 21.458 | Epsilon: 0.050
Resetting the environment...
Step: 245882 | Episode Reward: 9.248 | Epsilon: 0.050
Resetting the environment...
Step: 246078 | Episode Reward: 52.829 | Epsilon: 0.050
Resetting the environment...
Step: 246264 | Episode Reward: 18.433 | Epsilon: 0.050
Resetting the environm



Step: 257687 | Episode Reward: 59.115 | Epsilon: 0.050
Resetting the environment...
Step: 257879 | Episode Reward: 88.106 | Epsilon: 0.050
Resetting the environment...
Step: 258073 | Episode Reward: 133.329 | Epsilon: 0.050
Resetting the environment...
Step: 258329 | Episode Reward: 119.474 | Epsilon: 0.050
Resetting the environment...
Step: 258574 | Episode Reward: 115.773 | Epsilon: 0.050
Resetting the environment...
Step: 258767 | Episode Reward: 113.575 | Epsilon: 0.050
Resetting the environment...
Step: 258995 | Episode Reward: 121.231 | Epsilon: 0.050
Resetting the environment...
Step: 259188 | Episode Reward: 123.010 | Epsilon: 0.050
Resetting the environment...
Step: 259392 | Episode Reward: 119.957 | Epsilon: 0.050
Resetting the environment...
Step: 259629 | Episode Reward: 130.251 | Epsilon: 0.050
Resetting the environment...
Step: 259837 | Episode Reward: 135.209 | Epsilon: 0.050
Resetting the environment...
Step: 260057 | Episode Reward: 139.647 | Epsilon: 0.050
Resetting t



Step: 261411 | Episode Reward: 117.397 | Epsilon: 0.050
Resetting the environment...
Step: 261631 | Episode Reward: 94.896 | Epsilon: 0.050
Resetting the environment...
Step: 261896 | Episode Reward: 122.824 | Epsilon: 0.050
Resetting the environment...
Step: 262124 | Episode Reward: 118.619 | Epsilon: 0.050
Resetting the environment...
Step: 262365 | Episode Reward: 122.233 | Epsilon: 0.050
Resetting the environment...
Step: 262583 | Episode Reward: 143.521 | Epsilon: 0.050
Resetting the environment...
Step: 262796 | Episode Reward: 120.446 | Epsilon: 0.050
Resetting the environment...
Step: 262989 | Episode Reward: 84.079 | Epsilon: 0.050
Resetting the environment...
Step: 263202 | Episode Reward: 116.367 | Epsilon: 0.050
Resetting the environment...
Step: 263403 | Episode Reward: 108.066 | Epsilon: 0.050
Resetting the environment...
Step: 263604 | Episode Reward: 109.668 | Epsilon: 0.050
Resetting the environment...
Step: 263815 | Episode Reward: 122.636 | Epsilon: 0.050
Resetting t



Step: 291822 | Episode Reward: 126.580 | Epsilon: 0.050
Resetting the environment...
Step: 292019 | Episode Reward: 150.675 | Epsilon: 0.050
Resetting the environment...
Step: 292204 | Episode Reward: 151.052 | Epsilon: 0.050
Resetting the environment...
Step: 292395 | Episode Reward: 147.978 | Epsilon: 0.050
Resetting the environment...
Step: 292564 | Episode Reward: 153.509 | Epsilon: 0.050
Resetting the environment...
Step: 292740 | Episode Reward: 147.172 | Epsilon: 0.050
Resetting the environment...
Step: 292925 | Episode Reward: 146.373 | Epsilon: 0.050
Resetting the environment...
Step: 293110 | Episode Reward: 145.339 | Epsilon: 0.050
Resetting the environment...
Step: 293295 | Episode Reward: 148.011 | Epsilon: 0.050
Resetting the environment...
Step: 293475 | Episode Reward: 145.472 | Epsilon: 0.050
Resetting the environment...
Step: 293660 | Episode Reward: 138.761 | Epsilon: 0.050
Resetting the environment...
Step: 293844 | Episode Reward: 148.791 | Epsilon: 0.050
Resetting