### Import

In [1]:
import wandb
import gym
from src.agent import DQNAgent, Config

### Initialize the sweep

In [2]:
sweep_config = {
  "name" : "double-dqn-sweep",
  "method" : "random",
  "metric" :{
     "name": "reward",
     "goal": "maximize"
  },
  "parameters" : {
    "epochs" : {
      "value" : 900
    },
    "lr_decay" :{
      "min": 10,
      "max": 400
    },
    "gamma" :{
        "min": 0.01,
        "max": 0.99
    },
    "loss" :{
        "values": ["mse", "huber", "mae"]
    },
    "batch_size" :{
        "min": 10,
        "max": 512
    },
    "memory_size" :{
        "values": [512, 1025, 2048, 4096, 10000, 40000, 60000]
    },
    "target_update" :{
        "min": 1,
        "max": 50
    },
    "eps_decay" :{
        "min": 50,
        "max": 500
    }
  }
}
sweep_id = wandb.sweep(sweep_config)

Create sweep with ID: vhfjmlpf
Sweep URL: https://wandb.ai/hugoallegaert/uncategorized/sweeps/vhfjmlpf


### Initialize env

In [3]:
env_name = "Taxi-v3"
env = gym.make(env_name)
print('Observation:', env.observation_space.n, env.desc.shape[1], env.desc.shape[0])
print('Action:', env.action_space.n)

Observation: 500 11 7
Action: 6


### Run agent

In [4]:
def train():
    with wandb.init() as run:
        config = wandb.config
        ac = Config(target_update=config["target_update"], 
                lr=0.01, 
                lr_min=0.001, 
                lr_decay=config["lr_decay"], 
                gamma=config["gamma"],
                loss=config["loss"], 
                memory_size=config["memory_size"], 
                batch_size=config["batch_size"], 
                eps_start=1, 
                eps_min=0.01, 
                eps_decay=config["eps_decay"],
                learning_start=200,
                double_dqn=True)
        model = DQNAgent(env=env, config=ac)
        reward = model.fit(wandb_log=False, epochs=config["epochs"], save=False)  #  model training 
        wandb.log({"reward": reward})
        #wandb.log({"target_update": ac.target_update, "epochs": config["epochs"], "lr": ac.lr, "gamma": ac.gamma, "loss_type": ac.loss, "memory_size": ac.memory_size, "batch_size": ac.batch_size})

count = 50 # number of runs to execute
wandb.agent(sweep_id, function=train, count=count)

[34m[1mwandb[0m: Agent Starting Run: 5gj5qzfu with config:
[34m[1mwandb[0m: 	batch_size: 80
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 206
[34m[1mwandb[0m: 	gamma: 0.3992443702137618
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 222
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 13
[34m[1mwandb[0m: Currently logged in as: [33mhugoallegaert[0m (use `wandb login --relogin` to force relogin)


Training episode: 1/900
Episode 0	Average Score: -424.00
Training episode: 101/900
Episode 100	Average Score: -388.19
Training episode: 201/900
Episode 200	Average Score: -397.72
Training episode: 301/900
Episode 300	Average Score: -213.32
Training episode: 401/900
Episode 400	Average Score: -113.86
Training episode: 501/900
Episode 500	Average Score: -105.13
Training episode: 601/900
Episode 600	Average Score: -102.52
Training episode: 701/900
Episode 700	Average Score: -103.06
Training episode: 801/900
Episode 800	Average Score: -103.24
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.61


[34m[1mwandb[0m: Agent Starting Run: de6cqi0d with config:
[34m[1mwandb[0m: 	batch_size: 243
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 147
[34m[1mwandb[0m: 	gamma: 0.020937141060421008
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 190
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 31


Training episode: 1/900
Episode 0	Average Score: -496.00
Training episode: 101/900
Episode 100	Average Score: -380.38
Training episode: 201/900
Episode 200	Average Score: -389.96
Training episode: 301/900
Episode 300	Average Score: -180.93
Training episode: 401/900
Episode 400	Average Score: -103.99
Training episode: 501/900
Episode 500	Average Score: -100.56
Training episode: 601/900
Episode 600	Average Score: -100.75
Training episode: 701/900
Episode 700	Average Score: -100.05
Training episode: 801/900
Episode 800	Average Score: -99.61
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.25


[34m[1mwandb[0m: Agent Starting Run: b9sb7vaw with config:
[34m[1mwandb[0m: 	batch_size: 259
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 187
[34m[1mwandb[0m: 	gamma: 0.21843400200901347
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 254
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 28


Training episode: 1/900
Episode 0	Average Score: -451.00
Training episode: 101/900
Episode 100	Average Score: -384.78
Training episode: 201/900
Episode 200	Average Score: -388.61
Training episode: 301/900
Episode 300	Average Score: -210.34
Training episode: 401/900
Episode 400	Average Score: -110.53
Training episode: 501/900
Episode 500	Average Score: -103.33
Training episode: 601/900
Episode 600	Average Score: -103.15
Training episode: 701/900
Episode 700	Average Score: -102.52
Training episode: 801/900
Episode 800	Average Score: -103.33
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.52


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dx5lw38c with config:
[34m[1mwandb[0m: 	batch_size: 103
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 320
[34m[1mwandb[0m: 	gamma: 0.889210416912673
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 216
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 5


Training episode: 1/900
Episode 0	Average Score: -334.00
Training episode: 101/900
Episode 100	Average Score: -400.51
Training episode: 201/900
Episode 200	Average Score: -387.84
Training episode: 301/900
Episode 300	Average Score: -255.19
Training episode: 401/900
Episode 400	Average Score: -136.72
Training episode: 501/900
Episode 500	Average Score: -110.89
Training episode: 601/900
Episode 600	Average Score: -104.32
Training episode: 701/900
Episode 700	Average Score: -103.87
Training episode: 801/900
Episode 800	Average Score: -103.06
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.16


[34m[1mwandb[0m: Agent Starting Run: la80md25 with config:
[34m[1mwandb[0m: 	batch_size: 304
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 114
[34m[1mwandb[0m: 	gamma: 0.7671958863516961
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 179
[34m[1mwandb[0m: 	memory_size: 4096
[34m[1mwandb[0m: 	target_update: 36


Training episode: 1/900
Episode 0	Average Score: -316.00
Training episode: 101/900
Episode 100	Average Score: -392.32
Training episode: 201/900
Episode 200	Average Score: -390.35
Training episode: 301/900
Episode 300	Average Score: -168.22
Training episode: 401/900
Episode 400	Average Score: -103.06
Training episode: 501/900
Episode 500	Average Score: -102.88
Training episode: 601/900
Episode 600	Average Score: -103.24
Training episode: 701/900
Episode 700	Average Score: -102.52
Training episode: 801/900
Episode 800	Average Score: -102.88
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-106.84


[34m[1mwandb[0m: Agent Starting Run: f4u5gwwu with config:
[34m[1mwandb[0m: 	batch_size: 68
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 57
[34m[1mwandb[0m: 	gamma: 0.6493605658893583
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 174
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 46


Training episode: 1/900
Episode 0	Average Score: -424.00
Training episode: 101/900
Episode 100	Average Score: -414.25
Training episode: 201/900
Episode 200	Average Score: -415.10
Training episode: 301/900
Episode 300	Average Score: -136.72
Training episode: 401/900
Episode 400	Average Score: -102.07
Training episode: 501/900
Episode 500	Average Score: -102.70
Training episode: 601/900
Episode 600	Average Score: -102.79
Training episode: 701/900
Episode 700	Average Score: -103.51
Training episode: 801/900
Episode 800	Average Score: -102.52
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.25


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9x0wcrxc with config:
[34m[1mwandb[0m: 	batch_size: 396
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 142
[34m[1mwandb[0m: 	gamma: 0.8882826690841862
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 180
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 49


Training episode: 1/900
Episode 0	Average Score: -379.00
Training episode: 101/900
Episode 100	Average Score: -390.94
Training episode: 201/900
Episode 200	Average Score: -382.15
Training episode: 301/900
Episode 300	Average Score: -187.21
Training episode: 401/900
Episode 400	Average Score: -105.13
Training episode: 501/900
Episode 500	Average Score: -103.06
Training episode: 601/900
Episode 600	Average Score: -102.88
Training episode: 701/900
Episode 700	Average Score: -102.43
Training episode: 801/900
Episode 800	Average Score: -102.70
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-106.3


[34m[1mwandb[0m: Agent Starting Run: nveq4ytt with config:
[34m[1mwandb[0m: 	batch_size: 504
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 74
[34m[1mwandb[0m: 	gamma: 0.6788268981304163
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 400
[34m[1mwandb[0m: 	memory_size: 60000
[34m[1mwandb[0m: 	target_update: 42


Training episode: 1/900
Episode 0	Average Score: -343.00
Training episode: 101/900
Episode 100	Average Score: -384.99
Training episode: 201/900
Episode 200	Average Score: -374.99
Training episode: 301/900
Episode 300	Average Score: -143.92
Training episode: 401/900
Episode 400	Average Score: -102.52
Training episode: 501/900
Episode 500	Average Score: -102.18
Training episode: 601/900
Episode 600	Average Score: -90.72
Training episode: 701/900
Episode 700	Average Score: -77.37
Training episode: 801/900
Episode 800	Average Score: -61.96
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-62.44


[34m[1mwandb[0m: Agent Starting Run: 0sxrwvrk with config:
[34m[1mwandb[0m: 	batch_size: 10
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 282
[34m[1mwandb[0m: 	gamma: 0.8637555100103826
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 341
[34m[1mwandb[0m: 	memory_size: 10000
[34m[1mwandb[0m: 	target_update: 3


Training episode: 1/900
Episode 0	Average Score: -316.00
Training episode: 101/900
Episode 100	Average Score: -397.81
Training episode: 201/900
Episode 200	Average Score: -393.67
Training episode: 301/900
Episode 300	Average Score: -238.08
Training episode: 401/900
Episode 400	Average Score: -130.55
Training episode: 501/900
Episode 500	Average Score: -110.16
Training episode: 601/900
Episode 600	Average Score: -102.88
Training episode: 701/900
Episode 700	Average Score: -103.15
Training episode: 801/900
Episode 800	Average Score: -102.43
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.34


[34m[1mwandb[0m: Agent Starting Run: yjs0hb4i with config:
[34m[1mwandb[0m: 	batch_size: 488
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 349
[34m[1mwandb[0m: 	gamma: 0.9317551379842748
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 73
[34m[1mwandb[0m: 	memory_size: 4096
[34m[1mwandb[0m: 	target_update: 21


Training episode: 1/900
Episode 0	Average Score: -298.00
Training episode: 101/900
Episode 100	Average Score: -393.34
Training episode: 201/900
Episode 200	Average Score: -399.16
Training episode: 301/900
Episode 300	Average Score: -262.09
Training episode: 401/900
Episode 400	Average Score: -145.04
Training episode: 501/900
Episode 500	Average Score: -114.76
Training episode: 601/900
Episode 600	Average Score: -107.02
Training episode: 701/900
Episode 700	Average Score: -104.14
Training episode: 801/900
Episode 800	Average Score: -103.15
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.42


[34m[1mwandb[0m: Agent Starting Run: zh6520am with config:
[34m[1mwandb[0m: 	batch_size: 456
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 229
[34m[1mwandb[0m: 	gamma: 0.4211983330381009
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 229
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 44


Training episode: 1/900
Episode 0	Average Score: -370.00
Training episode: 101/900
Episode 100	Average Score: -393.24
Training episode: 201/900
Episode 200	Average Score: -386.56
Training episode: 301/900
Episode 300	Average Score: -221.89
Training episode: 401/900
Episode 400	Average Score: -119.03
Training episode: 501/900
Episode 500	Average Score: -100.32
Training episode: 601/900
Episode 600	Average Score: -97.26
Training episode: 701/900
Episode 700	Average Score: -89.69
Training episode: 801/900
Episode 800	Average Score: -94.37
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-96.72


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fbnzax0w with config:
[34m[1mwandb[0m: 	batch_size: 286
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 488
[34m[1mwandb[0m: 	gamma: 0.49655436001601766
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 253
[34m[1mwandb[0m: 	memory_size: 10000
[34m[1mwandb[0m: 	target_update: 7


Training episode: 1/900
Episode 0	Average Score: -352.00
Training episode: 101/900
Episode 100	Average Score: -390.72
Training episode: 201/900
Episode 200	Average Score: -390.27
Training episode: 301/900
Episode 300	Average Score: -280.10
Training episode: 401/900
Episode 400	Average Score: -156.75
Training episode: 501/900
Episode 500	Average Score: -115.85
Training episode: 601/900
Episode 600	Average Score: -94.29
Training episode: 701/900
Episode 700	Average Score: -90.10
Training episode: 801/900
Episode 800	Average Score: -82.36
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-89.11


[34m[1mwandb[0m: Agent Starting Run: 044buojr with config:
[34m[1mwandb[0m: 	batch_size: 72
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 306
[34m[1mwandb[0m: 	gamma: 0.9029408113388998
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 346
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 36


Training episode: 1/900
Episode 0	Average Score: -388.00
Training episode: 101/900
Episode 100	Average Score: -387.10
Training episode: 201/900
Episode 200	Average Score: -392.05
Training episode: 301/900
Episode 300	Average Score: -248.75
Training episode: 401/900
Episode 400	Average Score: -137.26
Training episode: 501/900
Episode 500	Average Score: -111.34
Training episode: 601/900
Episode 600	Average Score: -104.23
Training episode: 701/900
Episode 700	Average Score: -102.43
Training episode: 801/900
Episode 800	Average Score: -103.06
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.87


[34m[1mwandb[0m: Agent Starting Run: 7hwp2zll with config:
[34m[1mwandb[0m: 	batch_size: 39
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 113
[34m[1mwandb[0m: 	gamma: 0.972784503029858
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 171
[34m[1mwandb[0m: 	memory_size: 512
[34m[1mwandb[0m: 	target_update: 22


Training episode: 1/900
Episode 0	Average Score: -415.00
Training episode: 101/900
Episode 100	Average Score: -387.57
Training episode: 201/900
Episode 200	Average Score: -383.82
Training episode: 301/900
Episode 300	Average Score: -166.33
Training episode: 401/900
Episode 400	Average Score: -104.05
Training episode: 501/900
Episode 500	Average Score: -102.25
Training episode: 601/900
Episode 600	Average Score: -102.70
Training episode: 701/900
Episode 700	Average Score: -103.51
Training episode: 801/900
Episode 800	Average Score: -102.70
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.24


[34m[1mwandb[0m: Agent Starting Run: mukp73m6 with config:
[34m[1mwandb[0m: 	batch_size: 507
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 195
[34m[1mwandb[0m: 	gamma: 0.4656747655600562
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 184
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 34


Training episode: 1/900
Episode 0	Average Score: -361.00
Training episode: 101/900
Episode 100	Average Score: -384.19
Training episode: 201/900
Episode 200	Average Score: -396.86
Training episode: 301/900
Episode 300	Average Score: -207.10
Training episode: 401/900
Episode 400	Average Score: -112.33
Training episode: 501/900
Episode 500	Average Score: -103.78
Training episode: 601/900
Episode 600	Average Score: -103.87
Training episode: 701/900
Episode 700	Average Score: -102.70
Training episode: 801/900
Episode 800	Average Score: -102.88
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.52


[34m[1mwandb[0m: Agent Starting Run: ccdblrf5 with config:
[34m[1mwandb[0m: 	batch_size: 132
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 341
[34m[1mwandb[0m: 	gamma: 0.942994383341778
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 151
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 2


Training episode: 1/900
Episode 0	Average Score: -442.00
Training episode: 101/900
Episode 100	Average Score: -389.02
Training episode: 201/900
Episode 200	Average Score: -393.40
Training episode: 301/900
Episode 300	Average Score: -218.17
Training episode: 401/900
Episode 400	Average Score: -48.85
Training episode: 501/900
Episode 500	Average Score: -17.86
Training episode: 601/900
Episode 600	Average Score: 5.38
Training episode: 701/900
Episode 700	Average Score: 8.29
Training episode: 801/900
Episode 800	Average Score: 8.04
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,7.11


[34m[1mwandb[0m: Agent Starting Run: 5bkdffl1 with config:
[34m[1mwandb[0m: 	batch_size: 476
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 441
[34m[1mwandb[0m: 	gamma: 0.604657695571788
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 85
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 2


Training episode: 1/900
Episode 0	Average Score: -424.00
Training episode: 101/900
Episode 100	Average Score: -387.91
Training episode: 201/900
Episode 200	Average Score: -391.82
Training episode: 301/900
Episode 300	Average Score: -235.00
Training episode: 401/900
Episode 400	Average Score: -62.79
Training episode: 501/900
Episode 500	Average Score: -32.24
Training episode: 601/900
Episode 600	Average Score: -24.05
Training episode: 701/900
Episode 700	Average Score: -20.68
Training episode: 801/900
Episode 800	Average Score: 0.23
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,3.89


[34m[1mwandb[0m: Agent Starting Run: k3kfwmfb with config:
[34m[1mwandb[0m: 	batch_size: 209
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 190
[34m[1mwandb[0m: 	gamma: 0.8816330712299898
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 44
[34m[1mwandb[0m: 	memory_size: 10000
[34m[1mwandb[0m: 	target_update: 3


Training episode: 1/900
Episode 0	Average Score: -478.00
Training episode: 101/900
Episode 100	Average Score: -385.80
Training episode: 201/900
Episode 200	Average Score: -378.06
Training episode: 301/900
Episode 300	Average Score: -178.76
Training episode: 401/900
Episode 400	Average Score: -43.80
Training episode: 501/900
Episode 500	Average Score: -27.41
Training episode: 601/900
Episode 600	Average Score: -33.25
Training episode: 701/900
Episode 700	Average Score: -37.57
Training episode: 801/900
Episode 800	Average Score: -24.97
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-24.21


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 486qnnu1 with config:
[34m[1mwandb[0m: 	batch_size: 325
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 102
[34m[1mwandb[0m: 	gamma: 0.1082540641546331
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 103
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 12


Training episode: 1/900
Episode 0	Average Score: -370.00
Training episode: 101/900
Episode 100	Average Score: -386.56
Training episode: 201/900
Episode 200	Average Score: -388.94
Training episode: 301/900
Episode 300	Average Score: -162.45
Training episode: 401/900
Episode 400	Average Score: -104.14
Training episode: 501/900
Episode 500	Average Score: -102.34
Training episode: 601/900
Episode 600	Average Score: -102.16
Training episode: 701/900
Episode 700	Average Score: -103.60
Training episode: 801/900
Episode 800	Average Score: -103.51
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.15


[34m[1mwandb[0m: Agent Starting Run: o5ao6x2a with config:
[34m[1mwandb[0m: 	batch_size: 98
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 469
[34m[1mwandb[0m: 	gamma: 0.8884950823020181
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 26
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 8


Training episode: 1/900
Episode 0	Average Score: -388.00
Training episode: 101/900
Episode 100	Average Score: -393.45
Training episode: 201/900
Episode 200	Average Score: -387.97
Training episode: 301/900
Episode 300	Average Score: -279.91
Training episode: 401/900
Episode 400	Average Score: -173.10
Training episode: 501/900
Episode 500	Average Score: -130.42
Training episode: 601/900
Episode 600	Average Score: -113.14
Training episode: 701/900
Episode 700	Average Score: -106.21
Training episode: 801/900
Episode 800	Average Score: -104.77
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.43


[34m[1mwandb[0m: Agent Starting Run: wwktmwu8 with config:
[34m[1mwandb[0m: 	batch_size: 215
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 454
[34m[1mwandb[0m: 	gamma: 0.23408104892790543
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 294
[34m[1mwandb[0m: 	memory_size: 10000
[34m[1mwandb[0m: 	target_update: 19


Training episode: 1/900
Episode 0	Average Score: -361.00
Training episode: 101/900
Episode 100	Average Score: -398.22
Training episode: 201/900
Episode 200	Average Score: -391.96
Training episode: 301/900
Episode 300	Average Score: -282.43
Training episode: 401/900
Episode 400	Average Score: -164.06
Training episode: 501/900
Episode 500	Average Score: -123.16
Training episode: 601/900
Episode 600	Average Score: -109.21
Training episode: 701/900
Episode 700	Average Score: -100.03
Training episode: 801/900
Episode 800	Average Score: -97.84
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-98.71


[34m[1mwandb[0m: Agent Starting Run: xhyr593y with config:
[34m[1mwandb[0m: 	batch_size: 161
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 325
[34m[1mwandb[0m: 	gamma: 0.5439895398855141
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 303
[34m[1mwandb[0m: 	memory_size: 60000
[34m[1mwandb[0m: 	target_update: 50


Training episode: 1/900
Episode 0	Average Score: -379.00
Training episode: 101/900
Episode 100	Average Score: -391.12
Training episode: 201/900
Episode 200	Average Score: -392.40
Training episode: 301/900
Episode 300	Average Score: -255.20
Training episode: 401/900
Episode 400	Average Score: -137.35
Training episode: 501/900
Episode 500	Average Score: -110.89
Training episode: 601/900
Episode 600	Average Score: -95.63
Training episode: 701/900
Episode 700	Average Score: -96.34
Training episode: 801/900
Episode 800	Average Score: -99.04
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-97.52


[34m[1mwandb[0m: Agent Starting Run: bn02juij with config:
[34m[1mwandb[0m: 	batch_size: 394
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 405
[34m[1mwandb[0m: 	gamma: 0.9109882832450495
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 371
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 35


Training episode: 1/900
Episode 0	Average Score: -415.00
Training episode: 101/900
Episode 100	Average Score: -398.09
Training episode: 201/900
Episode 200	Average Score: -392.25
Training episode: 301/900
Episode 300	Average Score: -269.06
Training episode: 401/900
Episode 400	Average Score: -156.91
Training episode: 501/900
Episode 500	Average Score: -125.74
Training episode: 601/900
Episode 600	Average Score: -107.38
Training episode: 701/900
Episode 700	Average Score: -105.94
Training episode: 801/900
Episode 800	Average Score: -105.85
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-104.77


[34m[1mwandb[0m: Agent Starting Run: lkupufn3 with config:
[34m[1mwandb[0m: 	batch_size: 339
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 128
[34m[1mwandb[0m: 	gamma: 0.43160883878222217
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 130
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 27


Training episode: 1/900
Episode 0	Average Score: -352.00
Training episode: 101/900
Episode 100	Average Score: -374.52
Training episode: 201/900
Episode 200	Average Score: -382.24
Training episode: 301/900
Episode 300	Average Score: -178.07
Training episode: 401/900
Episode 400	Average Score: -92.36
Training episode: 501/900
Episode 500	Average Score: -84.52
Training episode: 601/900
Episode 600	Average Score: -68.42
Training episode: 701/900
Episode 700	Average Score: -67.06
Training episode: 801/900
Episode 800	Average Score: -69.79
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-64.0


[34m[1mwandb[0m: Agent Starting Run: zjymkbl3 with config:
[34m[1mwandb[0m: 	batch_size: 187
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 71
[34m[1mwandb[0m: 	gamma: 0.8278240650363119
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 66
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 15


Training episode: 1/900
Episode 0	Average Score: -451.00
Training episode: 101/900
Episode 100	Average Score: -408.06
Training episode: 201/900
Episode 200	Average Score: -416.66
Training episode: 301/900
Episode 300	Average Score: -144.10
Training episode: 401/900
Episode 400	Average Score: -102.61
Training episode: 501/900
Episode 500	Average Score: -103.42
Training episode: 601/900
Episode 600	Average Score: -103.33
Training episode: 701/900
Episode 700	Average Score: -103.15
Training episode: 801/900
Episode 800	Average Score: -101.98
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.7


[34m[1mwandb[0m: Agent Starting Run: xsfmzwej with config:
[34m[1mwandb[0m: 	batch_size: 387
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 460
[34m[1mwandb[0m: 	gamma: 0.038248848425788504
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 99
[34m[1mwandb[0m: 	memory_size: 4096
[34m[1mwandb[0m: 	target_update: 39


Training episode: 1/900
Episode 0	Average Score: -451.00
Training episode: 101/900
Episode 100	Average Score: -395.39
Training episode: 201/900
Episode 200	Average Score: -401.99
Training episode: 301/900
Episode 300	Average Score: -283.13
Training episode: 401/900
Episode 400	Average Score: -165.49
Training episode: 501/900
Episode 500	Average Score: -124.12
Training episode: 601/900
Episode 600	Average Score: -111.16
Training episode: 701/900
Episode 700	Average Score: -106.03
Training episode: 801/900
Episode 800	Average Score: -103.15
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.42


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uqeordky with config:
[34m[1mwandb[0m: 	batch_size: 364
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 171
[34m[1mwandb[0m: 	gamma: 0.26822084374388067
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 260
[34m[1mwandb[0m: 	memory_size: 10000
[34m[1mwandb[0m: 	target_update: 37


Training episode: 1/900
Episode 0	Average Score: -406.00
Training episode: 101/900
Episode 100	Average Score: -398.34
Training episode: 201/900
Episode 200	Average Score: -393.73
Training episode: 301/900
Episode 300	Average Score: -192.04
Training episode: 401/900
Episode 400	Average Score: -105.72
Training episode: 501/900
Episode 500	Average Score: -101.68
Training episode: 601/900
Episode 600	Average Score: -101.92
Training episode: 701/900
Episode 700	Average Score: -101.65
Training episode: 801/900
Episode 800	Average Score: -103.14
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.1


[34m[1mwandb[0m: Agent Starting Run: lqf4ojwo with config:
[34m[1mwandb[0m: 	batch_size: 343
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 281
[34m[1mwandb[0m: 	gamma: 0.3290558008455571
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 389
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 1


Training episode: 1/900
Episode 0	Average Score: -361.00
Training episode: 101/900
Episode 100	Average Score: -390.70
Training episode: 201/900
Episode 200	Average Score: -382.42
Training episode: 301/900
Episode 300	Average Score: -240.71
Training episode: 401/900
Episode 400	Average Score: -131.14
Training episode: 501/900
Episode 500	Average Score: -108.28
Training episode: 601/900
Episode 600	Average Score: -103.78
Training episode: 701/900
Episode 700	Average Score: -102.70
Training episode: 801/900
Episode 800	Average Score: -103.33
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.15


[34m[1mwandb[0m: Agent Starting Run: iibjakg9 with config:
[34m[1mwandb[0m: 	batch_size: 229
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 190
[34m[1mwandb[0m: 	gamma: 0.7400558300329381
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 127
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 10


Training episode: 1/900
Episode 0	Average Score: -397.00
Training episode: 101/900
Episode 100	Average Score: -396.68
Training episode: 201/900
Episode 200	Average Score: -397.01
Training episode: 301/900
Episode 300	Average Score: -214.13
Training episode: 401/900
Episode 400	Average Score: -110.89
Training episode: 501/900
Episode 500	Average Score: -103.87
Training episode: 601/900
Episode 600	Average Score: -103.60
Training episode: 701/900
Episode 700	Average Score: -103.24
Training episode: 801/900
Episode 800	Average Score: -102.88
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.51


[34m[1mwandb[0m: Agent Starting Run: 3yq26x7r with config:
[34m[1mwandb[0m: 	batch_size: 98
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 314
[34m[1mwandb[0m: 	gamma: 0.9576075009678694
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 112
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 30


Training episode: 1/900
Episode 0	Average Score: -397.00
Training episode: 101/900
Episode 100	Average Score: -390.87
Training episode: 201/900
Episode 200	Average Score: -389.55
Training episode: 301/900
Episode 300	Average Score: -251.97
Training episode: 401/900
Episode 400	Average Score: -132.13
Training episode: 501/900
Episode 500	Average Score: -108.91
Training episode: 601/900
Episode 600	Average Score: -104.05
Training episode: 701/900
Episode 700	Average Score: -102.43
Training episode: 801/900
Episode 800	Average Score: -102.70
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.34


[34m[1mwandb[0m: Agent Starting Run: 1xr87scz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 323
[34m[1mwandb[0m: 	gamma: 0.06388981031779906
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 32
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 1


Training episode: 1/900
Episode 0	Average Score: -406.00
Training episode: 101/900
Episode 100	Average Score: -386.32
Training episode: 201/900
Episode 200	Average Score: -391.39
Training episode: 301/900
Episode 300	Average Score: -254.73
Training episode: 401/900
Episode 400	Average Score: -137.88
Training episode: 501/900
Episode 500	Average Score: -111.45
Training episode: 601/900
Episode 600	Average Score: -103.02
Training episode: 701/900
Episode 700	Average Score: -101.58
Training episode: 801/900
Episode 800	Average Score: -101.55
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-101.42


[34m[1mwandb[0m: Agent Starting Run: h9mhy1vb with config:
[34m[1mwandb[0m: 	batch_size: 355
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 127
[34m[1mwandb[0m: 	gamma: 0.18519524129977377
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 44
[34m[1mwandb[0m: 	memory_size: 10000
[34m[1mwandb[0m: 	target_update: 49


Training episode: 1/900
Episode 0	Average Score: -361.00
Training episode: 101/900
Episode 100	Average Score: -381.09
Training episode: 201/900
Episode 200	Average Score: -389.17
Training episode: 301/900
Episode 300	Average Score: -171.37
Training episode: 401/900
Episode 400	Average Score: -104.32
Training episode: 501/900
Episode 500	Average Score: -102.97
Training episode: 601/900
Episode 600	Average Score: -103.06
Training episode: 701/900
Episode 700	Average Score: -102.88
Training episode: 801/900
Episode 800	Average Score: -102.61
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.97


[34m[1mwandb[0m: Agent Starting Run: y6093v0q with config:
[34m[1mwandb[0m: 	batch_size: 52
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 300
[34m[1mwandb[0m: 	gamma: 0.020481896252430156
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 338
[34m[1mwandb[0m: 	memory_size: 60000
[34m[1mwandb[0m: 	target_update: 19


Training episode: 1/900
Episode 0	Average Score: -397.00
Training episode: 101/900
Episode 100	Average Score: -396.82
Training episode: 201/900
Episode 200	Average Score: -386.03
Training episode: 301/900
Episode 300	Average Score: -243.10
Training episode: 401/900
Episode 400	Average Score: -131.21
Training episode: 501/900
Episode 500	Average Score: -108.96
Training episode: 601/900
Episode 600	Average Score: -104.59
Training episode: 701/900
Episode 700	Average Score: -101.88
Training episode: 801/900
Episode 800	Average Score: -102.93
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.43


[34m[1mwandb[0m: Agent Starting Run: 6s4yxiz0 with config:
[34m[1mwandb[0m: 	batch_size: 232
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 296
[34m[1mwandb[0m: 	gamma: 0.6684150339615155
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 23
[34m[1mwandb[0m: 	memory_size: 512
[34m[1mwandb[0m: 	target_update: 3


Training episode: 1/900
Episode 0	Average Score: -469.00
Training episode: 101/900
Episode 100	Average Score: -391.44
Training episode: 201/900
Episode 200	Average Score: -390.30
Training episode: 301/900
Episode 300	Average Score: -242.98
Training episode: 401/900
Episode 400	Average Score: -133.21
Training episode: 501/900
Episode 500	Average Score: -110.17
Training episode: 601/900
Episode 600	Average Score: -104.23
Training episode: 701/900
Episode 700	Average Score: -102.52
Training episode: 801/900
Episode 800	Average Score: -103.42
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.51


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8h0gy4zs with config:
[34m[1mwandb[0m: 	batch_size: 415
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 243
[34m[1mwandb[0m: 	gamma: 0.47384881652527666
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 197
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 47


Training episode: 1/900
Episode 0	Average Score: -298.00
Training episode: 101/900
Episode 100	Average Score: -394.52
Training episode: 201/900
Episode 200	Average Score: -399.63
Training episode: 301/900
Episode 300	Average Score: -229.27
Training episode: 401/900
Episode 400	Average Score: -122.05
Training episode: 501/900
Episode 500	Average Score: -106.39
Training episode: 601/900
Episode 600	Average Score: -103.60
Training episode: 701/900
Episode 700	Average Score: -102.52
Training episode: 801/900
Episode 800	Average Score: -103.24
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.61


[34m[1mwandb[0m: Agent Starting Run: 8sk49pxc with config:
[34m[1mwandb[0m: 	batch_size: 225
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 221
[34m[1mwandb[0m: 	gamma: 0.4895963794957548
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 301
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 30


Training episode: 1/900
Episode 0	Average Score: -388.00
Training episode: 101/900
Episode 100	Average Score: -378.11
Training episode: 201/900
Episode 200	Average Score: -391.78
Training episode: 301/900
Episode 300	Average Score: -224.02
Training episode: 401/900
Episode 400	Average Score: -115.03
Training episode: 501/900
Episode 500	Average Score: -106.30
Training episode: 601/900
Episode 600	Average Score: -101.62
Training episode: 701/900
Episode 700	Average Score: -104.86
Training episode: 801/900
Episode 800	Average Score: -104.50
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.78


[34m[1mwandb[0m: Agent Starting Run: 2ljh2tzz with config:
[34m[1mwandb[0m: 	batch_size: 18
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 58
[34m[1mwandb[0m: 	gamma: 0.3668805620320955
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 203
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 25


Training episode: 1/900
Episode 0	Average Score: -379.00
Training episode: 101/900
Episode 100	Average Score: -397.87
Training episode: 201/900
Episode 200	Average Score: -403.66
Training episode: 301/900
Episode 300	Average Score: -134.29
Training episode: 401/900
Episode 400	Average Score: -103.69
Training episode: 501/900
Episode 500	Average Score: -102.18
Training episode: 601/900
Episode 600	Average Score: -103.60
Training episode: 701/900
Episode 700	Average Score: -102.71
Training episode: 801/900
Episode 800	Average Score: -101.33
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.73


[34m[1mwandb[0m: Agent Starting Run: ks7yyzg8 with config:
[34m[1mwandb[0m: 	batch_size: 160
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 228
[34m[1mwandb[0m: 	gamma: 0.8454798857565531
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 234
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 16


Training episode: 1/900
Episode 0	Average Score: -361.00
Training episode: 101/900
Episode 100	Average Score: -385.18
Training episode: 201/900
Episode 200	Average Score: -394.92
Training episode: 301/900
Episode 300	Average Score: -215.11
Training episode: 401/900
Episode 400	Average Score: -87.50
Training episode: 501/900
Episode 500	Average Score: -53.61
Training episode: 601/900
Episode 600	Average Score: -54.06
Training episode: 701/900
Episode 700	Average Score: -54.36
Training episode: 801/900
Episode 800	Average Score: -43.09
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-50.87


[34m[1mwandb[0m: Agent Starting Run: 6913045o with config:
[34m[1mwandb[0m: 	batch_size: 138
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 263
[34m[1mwandb[0m: 	gamma: 0.28079407168880594
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 252
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 10


Training episode: 1/900
Episode 0	Average Score: -415.00
Training episode: 101/900
Episode 100	Average Score: -384.10
Training episode: 201/900
Episode 200	Average Score: -387.79
Training episode: 301/900
Episode 300	Average Score: -233.07
Training episode: 401/900
Episode 400	Average Score: -124.39
Training episode: 501/900
Episode 500	Average Score: -105.67
Training episode: 601/900
Episode 600	Average Score: -102.61
Training episode: 701/900
Episode 700	Average Score: -102.88
Training episode: 801/900
Episode 800	Average Score: -103.06
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.7


[34m[1mwandb[0m: Agent Starting Run: mtirv3ha with config:
[34m[1mwandb[0m: 	batch_size: 21
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 420
[34m[1mwandb[0m: 	gamma: 0.22621032296840227
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 277
[34m[1mwandb[0m: 	memory_size: 512
[34m[1mwandb[0m: 	target_update: 32


Training episode: 1/900
Episode 0	Average Score: -424.00
Training episode: 101/900
Episode 100	Average Score: -382.11
Training episode: 201/900
Episode 200	Average Score: -399.48
Training episode: 301/900
Episode 300	Average Score: -278.55
Training episode: 401/900
Episode 400	Average Score: -160.66
Training episode: 501/900
Episode 500	Average Score: -122.77
Training episode: 601/900
Episode 600	Average Score: -109.00
Training episode: 701/900
Episode 700	Average Score: -104.59
Training episode: 801/900
Episode 800	Average Score: -103.06
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.88


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rm9igsi5 with config:
[34m[1mwandb[0m: 	batch_size: 337
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 262
[34m[1mwandb[0m: 	gamma: 0.43101006573175094
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 26
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 9


Training episode: 1/900
Episode 0	Average Score: -406.00
Training episode: 101/900
Episode 100	Average Score: -384.01
Training episode: 201/900
Episode 200	Average Score: -386.56
Training episode: 301/900
Episode 300	Average Score: -240.31
Training episode: 401/900
Episode 400	Average Score: -121.32
Training episode: 501/900
Episode 500	Average Score: -106.84
Training episode: 601/900
Episode 600	Average Score: -103.33
Training episode: 701/900
Episode 700	Average Score: -102.25
Training episode: 801/900
Episode 800	Average Score: -102.79
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.06


[34m[1mwandb[0m: Agent Starting Run: pqy1irp0 with config:
[34m[1mwandb[0m: 	batch_size: 48
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 336
[34m[1mwandb[0m: 	gamma: 0.5413775949650805
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 91
[34m[1mwandb[0m: 	memory_size: 40000
[34m[1mwandb[0m: 	target_update: 10


Training episode: 1/900
Episode 0	Average Score: -379.00
Training episode: 101/900
Episode 100	Average Score: -403.75
Training episode: 201/900
Episode 200	Average Score: -385.88
Training episode: 301/900
Episode 300	Average Score: -253.08
Training episode: 401/900
Episode 400	Average Score: -130.83
Training episode: 501/900
Episode 500	Average Score: -97.25
Training episode: 601/900
Episode 600	Average Score: -80.60
Training episode: 701/900
Episode 700	Average Score: -69.87
Training episode: 801/900
Episode 800	Average Score: -65.03
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-46.77


[34m[1mwandb[0m: Agent Starting Run: v8y32gtp with config:
[34m[1mwandb[0m: 	batch_size: 354
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 459
[34m[1mwandb[0m: 	gamma: 0.3404690081228552
[34m[1mwandb[0m: 	loss: huber
[34m[1mwandb[0m: 	lr_decay: 15
[34m[1mwandb[0m: 	memory_size: 512
[34m[1mwandb[0m: 	target_update: 38


Training episode: 1/900
Episode 0	Average Score: -379.00
Training episode: 101/900
Episode 100	Average Score: -387.43
Training episode: 201/900
Episode 200	Average Score: -395.88
Training episode: 301/900
Episode 300	Average Score: -290.93
Training episode: 401/900
Episode 400	Average Score: -172.18
Training episode: 501/900
Episode 500	Average Score: -126.28
Training episode: 601/900
Episode 600	Average Score: -110.44
Training episode: 701/900
Episode 700	Average Score: -106.66
Training episode: 801/900
Episode 800	Average Score: -103.78
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.61


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: al5j5914 with config:
[34m[1mwandb[0m: 	batch_size: 458
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 161
[34m[1mwandb[0m: 	gamma: 0.9873471168853172
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 374
[34m[1mwandb[0m: 	memory_size: 60000
[34m[1mwandb[0m: 	target_update: 30


Training episode: 1/900
Episode 0	Average Score: -406.00
Training episode: 101/900
Episode 100	Average Score: -389.38
Training episode: 201/900
Episode 200	Average Score: -385.32
Training episode: 301/900
Episode 300	Average Score: -196.21
Training episode: 401/900
Episode 400	Average Score: -109.72
Training episode: 501/900
Episode 500	Average Score: -100.70
Training episode: 601/900
Episode 600	Average Score: -90.54
Training episode: 701/900
Episode 700	Average Score: -88.24
Training episode: 801/900
Episode 800	Average Score: -69.53
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-88.92


[34m[1mwandb[0m: Agent Starting Run: hicdfeh1 with config:
[34m[1mwandb[0m: 	batch_size: 118
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 92
[34m[1mwandb[0m: 	gamma: 0.5190345213953869
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 326
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 34


Training episode: 1/900
Episode 0	Average Score: -325.00
Training episode: 101/900
Episode 100	Average Score: -381.52
Training episode: 201/900
Episode 200	Average Score: -383.75
Training episode: 301/900
Episode 300	Average Score: -155.14
Training episode: 401/900
Episode 400	Average Score: -103.42
Training episode: 501/900
Episode 500	Average Score: -102.52
Training episode: 601/900
Episode 600	Average Score: -102.16
Training episode: 701/900
Episode 700	Average Score: -103.15
Training episode: 801/900
Episode 800	Average Score: -102.97
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.43


[34m[1mwandb[0m: Agent Starting Run: djwztov4 with config:
[34m[1mwandb[0m: 	batch_size: 242
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 485
[34m[1mwandb[0m: 	gamma: 0.37805371480914407
[34m[1mwandb[0m: 	loss: mae
[34m[1mwandb[0m: 	lr_decay: 186
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 4


Training episode: 1/900
Episode 0	Average Score: -370.00
Training episode: 101/900
Episode 100	Average Score: -390.86
Training episode: 201/900
Episode 200	Average Score: -389.89
Training episode: 301/900
Episode 300	Average Score: -286.12
Training episode: 401/900
Episode 400	Average Score: -176.39
Training episode: 501/900
Episode 500	Average Score: -128.08
Training episode: 601/900
Episode 600	Average Score: -113.68
Training episode: 701/900
Episode 700	Average Score: -106.39
Training episode: 801/900
Episode 800	Average Score: -104.68
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-105.58


[34m[1mwandb[0m: Agent Starting Run: tw9bmpdg with config:
[34m[1mwandb[0m: 	batch_size: 181
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 344
[34m[1mwandb[0m: 	gamma: 0.21048862497411747
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 191
[34m[1mwandb[0m: 	memory_size: 2048
[34m[1mwandb[0m: 	target_update: 5


Training episode: 1/900
Episode 0	Average Score: -424.00
Training episode: 101/900
Episode 100	Average Score: -395.57
Training episode: 201/900
Episode 200	Average Score: -387.43
Training episode: 301/900
Episode 300	Average Score: -258.68
Training episode: 401/900
Episode 400	Average Score: -143.72
Training episode: 501/900
Episode 500	Average Score: -112.27
Training episode: 601/900
Episode 600	Average Score: -103.57
Training episode: 701/900
Episode 700	Average Score: -101.57
Training episode: 801/900
Episode 800	Average Score: -103.06
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.25


[34m[1mwandb[0m: Agent Starting Run: c2ct5oe2 with config:
[34m[1mwandb[0m: 	batch_size: 409
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 137
[34m[1mwandb[0m: 	gamma: 0.9616054536600162
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 271
[34m[1mwandb[0m: 	memory_size: 60000
[34m[1mwandb[0m: 	target_update: 21


Training episode: 1/900
Episode 0	Average Score: -397.00
Training episode: 101/900
Episode 100	Average Score: -411.15
Training episode: 201/900
Episode 200	Average Score: -408.41
Training episode: 301/900
Episode 300	Average Score: -176.73
Training episode: 401/900
Episode 400	Average Score: -102.19
Training episode: 501/900
Episode 500	Average Score: -94.50
Training episode: 601/900
Episode 600	Average Score: -80.42
Training episode: 701/900
Episode 700	Average Score: -80.80
Training episode: 801/900
Episode 800	Average Score: -73.18
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-83.03


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m5gfn0ef with config:
[34m[1mwandb[0m: 	batch_size: 166
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 444
[34m[1mwandb[0m: 	gamma: 0.8859243332411497
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 203
[34m[1mwandb[0m: 	memory_size: 1025
[34m[1mwandb[0m: 	target_update: 28


Training episode: 1/900
Episode 0	Average Score: -388.00
Training episode: 101/900
Episode 100	Average Score: -395.63
Training episode: 201/900
Episode 200	Average Score: -392.66
Training episode: 301/900
Episode 300	Average Score: -279.56
Training episode: 401/900
Episode 400	Average Score: -164.40
Training episode: 501/900
Episode 500	Average Score: -126.82
Training episode: 601/900
Episode 600	Average Score: -110.62
Training episode: 701/900
Episode 700	Average Score: -104.95
Training episode: 801/900
Episode 800	Average Score: -105.22
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-103.42


[34m[1mwandb[0m: Agent Starting Run: umcmest5 with config:
[34m[1mwandb[0m: 	batch_size: 421
[34m[1mwandb[0m: 	epochs: 900
[34m[1mwandb[0m: 	eps_decay: 402
[34m[1mwandb[0m: 	gamma: 0.49159222641724026
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	lr_decay: 371
[34m[1mwandb[0m: 	memory_size: 60000
[34m[1mwandb[0m: 	target_update: 48


Training episode: 1/900
Episode 0	Average Score: -343.00
Training episode: 101/900
Episode 100	Average Score: -389.82
Training episode: 201/900
Episode 200	Average Score: -388.10
Training episode: 301/900
Episode 300	Average Score: -280.09
Training episode: 401/900
Episode 400	Average Score: -145.54
Training episode: 501/900
Episode 500	Average Score: -116.49
Training episode: 601/900
Episode 600	Average Score: -100.67
Training episode: 701/900
Episode 700	Average Score: -66.07
Training episode: 801/900
Episode 800	Average Score: -45.72
Training episode: 900/900


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-29.79
