### Import

In [5]:
import wandb
from src.ppo.ppo import PPOAgent, Config
import gym

### Initialize the sweep

In [6]:
sweep_config = {
  "name" : "ppo-sweep",
  "method" : "random",
  "metric" :{
     "name": "reward",
     "goal": "maximize"
  },
  "parameters" : {
    "training_step" : {
      "value" : 100000
    },
    "lr" :{
      "min": 0.0001,
      "max": 0.001
    },
    "gamma" :{
        "min": 0.01,
        "max": 0.99
    },
    "batch_size" :{
        "min": 500,
        "max": 4000
    },
    "n_update" :{
        "min": 1,
        "max": 50
    },
    "clip" :{
        "min": 0.1,
        "max": 0.9
    }
  }
}
sweep_id = wandb.sweep(sweep_config)

Create sweep with ID: p75uyita
Sweep URL: https://wandb.ai/hugoallegaert/uncategorized/sweeps/p75uyita


### Initialize env

In [7]:
env_name = "Taxi-v3"
env = gym.make(env_name)
print('Observation:', env.observation_space.n, env.desc.shape[1], env.desc.shape[0])
print('Action:', env.action_space.n)

Observation: 500 11 7
Action: 6


### Run agent

In [8]:
def train():
    with wandb.init() as run:
        config = wandb.config
        ac = Config(training_step=config["training_step"], 
                lr=config["lr"],
                lr_min=False,
                lr_decay=False, 
                gamma=config["gamma"],
                loss='mse', 
                n_updates=config["n_update"],
                batch_size=config["batch_size"], 
                epsilon=0.5, 
                clip=config["clip"])
        model = PPOAgent(env=env, config=ac)
        reward = model.fit()  #  model training 
        wandb.log({"reward": reward})
        #wandb.log({"target_update": ac.target_update, "epochs": config["epochs"], "lr": ac.lr, "gamma": ac.gamma, "loss_type": ac.loss, "memory_size": ac.memory_size, "batch_size": ac.batch_size})

count = 50 # number of runs to execute
wandb.agent(sweep_id, function=train, count=count)

[34m[1mwandb[0m: Agent Starting Run: kahx5ola with config:
[34m[1mwandb[0m: 	batch_size: 2041
[34m[1mwandb[0m: 	clip: 0.45299368911859894
[34m[1mwandb[0m: 	gamma: 0.13032392448185276
[34m[1mwandb[0m: 	lr: 0.0006694116797368745
[34m[1mwandb[0m: 	n_update: 45
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: ev2vdcfm with config:
[34m[1mwandb[0m: 	batch_size: 3233
[34m[1mwandb[0m: 	clip: 0.29845518877077365
[34m[1mwandb[0m: 	gamma: 0.7477062389770642
[34m[1mwandb[0m: 	lr: 0.000196634878747953
[34m[1mwandb[0m: 	n_update: 7
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-102.18182


[34m[1mwandb[0m: Agent Starting Run: nfzhq75q with config:
[34m[1mwandb[0m: 	batch_size: 3328
[34m[1mwandb[0m: 	clip: 0.23325494842008024
[34m[1mwandb[0m: 	gamma: 0.7288601824484812
[34m[1mwandb[0m: 	lr: 0.0007464554354614206
[34m[1mwandb[0m: 	n_update: 37
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: qi30svuy with config:
[34m[1mwandb[0m: 	batch_size: 1982
[34m[1mwandb[0m: 	clip: 0.8224652813357181
[34m[1mwandb[0m: 	gamma: 0.2545512261495047
[34m[1mwandb[0m: 	lr: 0.0003449525109690423
[34m[1mwandb[0m: 	n_update: 21
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: g2k7lppn with config:
[34m[1mwandb[0m: 	batch_size: 3507
[34m[1mwandb[0m: 	clip: 0.4714507014301466
[34m[1mwandb[0m: 	gamma: 0.1063236216923471
[34m[1mwandb[0m: 	lr: 0.0006579554788081062
[34m[1mwandb[0m: 	n_update: 9
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d2p0zga9 with config:
[34m[1mwandb[0m: 	batch_size: 3752
[34m[1mwandb[0m: 	clip: 0.2135132871461357
[34m[1mwandb[0m: 	gamma: 0.16363022816748996
[34m[1mwandb[0m: 	lr: 0.00023924625088159009
[34m[1mwandb[0m: 	n_update: 14
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: p4gscand with config:
[34m[1mwandb[0m: 	batch_size: 3584
[34m[1mwandb[0m: 	clip: 0.37873837159387747
[34m[1mwandb[0m: 	gamma: 0.9830288093988944
[34m[1mwandb[0m: 	lr: 0.0006008598035539262
[34m[1mwandb[0m: 	n_update: 28
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: hgedv7fx with config:
[34m[1mwandb[0m: 	batch_size: 700
[34m[1mwandb[0m: 	clip: 0.21897164239534855
[34m[1mwandb[0m: 	gamma: 0.13809758374917663
[34m[1mwandb[0m: 	lr: 0.0004957803522070033
[34m[1mwandb[0m: 	n_update: 24
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ery6y116 with config:
[34m[1mwandb[0m: 	batch_size: 1823
[34m[1mwandb[0m: 	clip: 0.20578937508465409
[34m[1mwandb[0m: 	gamma: 0.423003237868386
[34m[1mwandb[0m: 	lr: 0.0001743196442685273
[34m[1mwandb[0m: 	n_update: 18
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.94737


[34m[1mwandb[0m: Agent Starting Run: 664w9lt0 with config:
[34m[1mwandb[0m: 	batch_size: 2229
[34m[1mwandb[0m: 	clip: 0.45542612221676826
[34m[1mwandb[0m: 	gamma: 0.3956916409222072
[34m[1mwandb[0m: 	lr: 0.00017689315380045232
[34m[1mwandb[0m: 	n_update: 50
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a4pd35z5 with config:
[34m[1mwandb[0m: 	batch_size: 3915
[34m[1mwandb[0m: 	clip: 0.4689002240217327
[34m[1mwandb[0m: 	gamma: 0.3916340553335336
[34m[1mwandb[0m: 	lr: 0.0009190321536137936
[34m[1mwandb[0m: 	n_update: 28
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 26t719fe with config:
[34m[1mwandb[0m: 	batch_size: 2035
[34m[1mwandb[0m: 	clip: 0.27715362531536936
[34m[1mwandb[0m: 	gamma: 0.26728607954774536
[34m[1mwandb[0m: 	lr: 0.0008874816553973887
[34m[1mwandb[0m: 	n_update: 21
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: lez3w4x0 with config:
[34m[1mwandb[0m: 	batch_size: 1179
[34m[1mwandb[0m: 	clip: 0.6554726272849767
[34m[1mwandb[0m: 	gamma: 0.9196094843859768
[34m[1mwandb[0m: 	lr: 0.00014145339664771306
[34m[1mwandb[0m: 	n_update: 1
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -343


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-331.0


[34m[1mwandb[0m: Agent Starting Run: yletfks8 with config:
[34m[1mwandb[0m: 	batch_size: 1820
[34m[1mwandb[0m: 	clip: 0.28148326470701934
[34m[1mwandb[0m: 	gamma: 0.17124070150892384
[34m[1mwandb[0m: 	lr: 0.00045100921366299327
[34m[1mwandb[0m: 	n_update: 9
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: exu52z4a with config:
[34m[1mwandb[0m: 	batch_size: 2835
[34m[1mwandb[0m: 	clip: 0.1654040435176506
[34m[1mwandb[0m: 	gamma: 0.8075515731481343
[34m[1mwandb[0m: 	lr: 0.0004589360958948044
[34m[1mwandb[0m: 	n_update: 7
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lr6yhhud with config:
[34m[1mwandb[0m: 	batch_size: 2469
[34m[1mwandb[0m: 	clip: 0.302574619382741
[34m[1mwandb[0m: 	gamma: 0.8530380298586135
[34m[1mwandb[0m: 	lr: 0.0004235486707455739
[34m[1mwandb[0m: 	n_update: 6
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pwgafvjv with config:
[34m[1mwandb[0m: 	batch_size: 1352
[34m[1mwandb[0m: 	clip: 0.5458802249545375
[34m[1mwandb[0m: 	gamma: 0.33451199801234427
[34m[1mwandb[0m: 	lr: 0.0004352806086827783
[34m[1mwandb[0m: 	n_update: 25
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: x3zq16a0 with config:
[34m[1mwandb[0m: 	batch_size: 2873
[34m[1mwandb[0m: 	clip: 0.843152982244454
[34m[1mwandb[0m: 	gamma: 0.3203018551901997
[34m[1mwandb[0m: 	lr: 0.00023464833825364545
[34m[1mwandb[0m: 	n_update: 7
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-101.24138


[34m[1mwandb[0m: Agent Starting Run: sf0p9umk with config:
[34m[1mwandb[0m: 	batch_size: 3461
[34m[1mwandb[0m: 	clip: 0.5308163307126745
[34m[1mwandb[0m: 	gamma: 0.04683537449425208
[34m[1mwandb[0m: 	lr: 0.000960006721948156
[34m[1mwandb[0m: 	n_update: 33
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: qxwhveg0 with config:
[34m[1mwandb[0m: 	batch_size: 738
[34m[1mwandb[0m: 	clip: 0.7893795811920917
[34m[1mwandb[0m: 	gamma: 0.5041533993817341
[34m[1mwandb[0m: 	lr: 0.0005188085458832195
[34m[1mwandb[0m: 	n_update: 19
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7g2btwsr with config:
[34m[1mwandb[0m: 	batch_size: 968
[34m[1mwandb[0m: 	clip: 0.5768694265408846
[34m[1mwandb[0m: 	gamma: 0.5519035044402145
[34m[1mwandb[0m: 	lr: 0.0001633959608462495
[34m[1mwandb[0m: 	n_update: 41
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 5oek0e1l with config:
[34m[1mwandb[0m: 	batch_size: 2078
[34m[1mwandb[0m: 	clip: 0.2359379983547023
[34m[1mwandb[0m: 	gamma: 0.32866695051777434
[34m[1mwandb[0m: 	lr: 0.0005206175754036423
[34m[1mwandb[0m: 	n_update: 44
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 8rmpsuq1 with config:
[34m[1mwandb[0m: 	batch_size: 2613
[34m[1mwandb[0m: 	clip: 0.6803340735054143
[34m[1mwandb[0m: 	gamma: 0.480841155808667
[34m[1mwandb[0m: 	lr: 0.0006098492364251376
[34m[1mwandb[0m: 	n_update: 39
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 09jonijj with config:
[34m[1mwandb[0m: 	batch_size: 1111
[34m[1mwandb[0m: 	clip: 0.415270129328558
[34m[1mwandb[0m: 	gamma: 0.4257918248535707
[34m[1mwandb[0m: 	lr: 0.00049054240695325
[34m[1mwandb[0m: 	n_update: 9
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: srpw3is7 with config:
[34m[1mwandb[0m: 	batch_size: 3579
[34m[1mwandb[0m: 	clip: 0.2905572756302518
[34m[1mwandb[0m: 	gamma: 0.20961640883461008
[34m[1mwandb[0m: 	lr: 0.0003310811459221828
[34m[1mwandb[0m: 	n_update: 23
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: cuask2hh with config:
[34m[1mwandb[0m: 	batch_size: 3275
[34m[1mwandb[0m: 	clip: 0.6854981275598716
[34m[1mwandb[0m: 	gamma: 0.8751083768065927
[34m[1mwandb[0m: 	lr: 0.0006176460092321585
[34m[1mwandb[0m: 	n_update: 17
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 91fs3c32 with config:
[34m[1mwandb[0m: 	batch_size: 2867
[34m[1mwandb[0m: 	clip: 0.1798980880598392
[34m[1mwandb[0m: 	gamma: 0.6467367722241253
[34m[1mwandb[0m: 	lr: 0.000861716737315329
[34m[1mwandb[0m: 	n_update: 27
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 55weosqx with config:
[34m[1mwandb[0m: 	batch_size: 2591
[34m[1mwandb[0m: 	clip: 0.736006819151814
[34m[1mwandb[0m: 	gamma: 0.2574065053976786
[34m[1mwandb[0m: 	lr: 0.000400599431090878
[34m[1mwandb[0m: 	n_update: 2
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -190


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-140.5


[34m[1mwandb[0m: Agent Starting Run: zy0ca2kz with config:
[34m[1mwandb[0m: 	batch_size: 1638
[34m[1mwandb[0m: 	clip: 0.3454551909913522
[34m[1mwandb[0m: 	gamma: 0.5530794373086856
[34m[1mwandb[0m: 	lr: 0.0007212402924483537
[34m[1mwandb[0m: 	n_update: 23
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: cwn45fo1 with config:
[34m[1mwandb[0m: 	batch_size: 2362
[34m[1mwandb[0m: 	clip: 0.3805212100636355
[34m[1mwandb[0m: 	gamma: 0.698312467714567
[34m[1mwandb[0m: 	lr: 0.00021263153562136043
[34m[1mwandb[0m: 	n_update: 37
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.375


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 836idis7 with config:
[34m[1mwandb[0m: 	batch_size: 3680
[34m[1mwandb[0m: 	clip: 0.8606045647422683
[34m[1mwandb[0m: 	gamma: 0.28819708609208466
[34m[1mwandb[0m: 	lr: 0.0006138388219708029
[34m[1mwandb[0m: 	n_update: 14
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: al1ib1wa with config:
[34m[1mwandb[0m: 	batch_size: 3892
[34m[1mwandb[0m: 	clip: 0.4427621073980017
[34m[1mwandb[0m: 	gamma: 0.19732153614310713
[34m[1mwandb[0m: 	lr: 0.00023924659598901712
[34m[1mwandb[0m: 	n_update: 45
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 1ru1u4a0 with config:
[34m[1mwandb[0m: 	batch_size: 2737
[34m[1mwandb[0m: 	clip: 0.4240267402233697
[34m[1mwandb[0m: 	gamma: 0.5814946280436613
[34m[1mwandb[0m: 	lr: 0.0008430535584776421
[34m[1mwandb[0m: 	n_update: 32
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: iybxaci9 with config:
[34m[1mwandb[0m: 	batch_size: 2395
[34m[1mwandb[0m: 	clip: 0.7861525611771233
[34m[1mwandb[0m: 	gamma: 0.9028541452224588
[34m[1mwandb[0m: 	lr: 0.0001160349241918308
[34m[1mwandb[0m: 	n_update: 30
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 40t13b7c with config:
[34m[1mwandb[0m: 	batch_size: 954
[34m[1mwandb[0m: 	clip: 0.4487554196632807
[34m[1mwandb[0m: 	gamma: 0.3593273963465326
[34m[1mwandb[0m: 	lr: 0.0008088470872985966
[34m[1mwandb[0m: 	n_update: 33
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: wwn30n4f with config:
[34m[1mwandb[0m: 	batch_size: 1241
[34m[1mwandb[0m: 	clip: 0.691932309670058
[34m[1mwandb[0m: 	gamma: 0.9280528662869914
[34m[1mwandb[0m: 	lr: 0.0008920242491574958
[34m[1mwandb[0m: 	n_update: 18
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: nxyqtbls with config:
[34m[1mwandb[0m: 	batch_size: 634
[34m[1mwandb[0m: 	clip: 0.3810529226163001
[34m[1mwandb[0m: 	gamma: 0.5866642551808561
[34m[1mwandb[0m: 	lr: 0.0003714865428671978
[34m[1mwandb[0m: 	n_update: 33
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: bemt9kcv with config:
[34m[1mwandb[0m: 	batch_size: 651
[34m[1mwandb[0m: 	clip: 0.4630024099859094
[34m[1mwandb[0m: 	gamma: 0.18181812112848
[34m[1mwandb[0m: 	lr: 0.0008187369228309728
[34m[1mwandb[0m: 	n_update: 40
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dm5i7m74 with config:
[34m[1mwandb[0m: 	batch_size: 529
[34m[1mwandb[0m: 	clip: 0.7613841626821489
[34m[1mwandb[0m: 	gamma: 0.8329985178701079
[34m[1mwandb[0m: 	lr: 0.0007613918483234356
[34m[1mwandb[0m: 	n_update: 29
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: n8lydraz with config:
[34m[1mwandb[0m: 	batch_size: 2466
[34m[1mwandb[0m: 	clip: 0.4941539937140693
[34m[1mwandb[0m: 	gamma: 0.6751793597558634
[34m[1mwandb[0m: 	lr: 0.0005229021485052564
[34m[1mwandb[0m: 	n_update: 5
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.36


[34m[1mwandb[0m: Agent Starting Run: 10oevi0k with config:
[34m[1mwandb[0m: 	batch_size: 750
[34m[1mwandb[0m: 	clip: 0.1596692185918161
[34m[1mwandb[0m: 	gamma: 0.24168140490627
[34m[1mwandb[0m: 	lr: 0.00028969710171747006
[34m[1mwandb[0m: 	n_update: 42
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 5nqytoml with config:
[34m[1mwandb[0m: 	batch_size: 1709
[34m[1mwandb[0m: 	clip: 0.13148222515391517
[34m[1mwandb[0m: 	gamma: 0.3267187069558272
[34m[1mwandb[0m: 	lr: 0.0008040001477974224
[34m[1mwandb[0m: 	n_update: 14
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: i30ka2ht with config:
[34m[1mwandb[0m: 	batch_size: 3980
[34m[1mwandb[0m: 	clip: 0.43005428912983024
[34m[1mwandb[0m: 	gamma: 0.6838922441741873
[34m[1mwandb[0m: 	lr: 0.0002777397221021639
[34m[1mwandb[0m: 	n_update: 50
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 0nj3ze17 with config:
[34m[1mwandb[0m: 	batch_size: 3737
[34m[1mwandb[0m: 	clip: 0.4751131207862811
[34m[1mwandb[0m: 	gamma: 0.5506095417434237
[34m[1mwandb[0m: 	lr: 0.0008041657433731851
[34m[1mwandb[0m: 	n_update: 26
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: ymk7h9wa with config:
[34m[1mwandb[0m: 	batch_size: 1070
[34m[1mwandb[0m: 	clip: 0.4378535437375156
[34m[1mwandb[0m: 	gamma: 0.5244599885648172
[34m[1mwandb[0m: 	lr: 0.00019165220406105885
[34m[1mwandb[0m: 	n_update: 42
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 1k5s450d with config:
[34m[1mwandb[0m: 	batch_size: 3671
[34m[1mwandb[0m: 	clip: 0.6555898889266768
[34m[1mwandb[0m: 	gamma: 0.8006187918986267
[34m[1mwandb[0m: 	lr: 0.000754762370080422
[34m[1mwandb[0m: 	n_update: 41
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: 6enkdoqj with config:
[34m[1mwandb[0m: 	batch_size: 1954
[34m[1mwandb[0m: 	clip: 0.3282105005556395
[34m[1mwandb[0m: 	gamma: 0.2147481537725645
[34m[1mwandb[0m: 	lr: 0.0006033922960465377
[34m[1mwandb[0m: 	n_update: 49
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9nytz6im with config:
[34m[1mwandb[0m: 	batch_size: 1877
[34m[1mwandb[0m: 	clip: 0.7268106567104317
[34m[1mwandb[0m: 	gamma: 0.28087226215520555
[34m[1mwandb[0m: 	lr: 0.00025083120582576737
[34m[1mwandb[0m: 	n_update: 43
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aa3zhztp with config:
[34m[1mwandb[0m: 	batch_size: 2333
[34m[1mwandb[0m: 	clip: 0.3086999716554639
[34m[1mwandb[0m: 	gamma: 0.8228390572896283
[34m[1mwandb[0m: 	lr: 0.0004492424972017131
[34m[1mwandb[0m: 	n_update: 16
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0


[34m[1mwandb[0m: Agent Starting Run: tygdg9ex with config:
[34m[1mwandb[0m: 	batch_size: 1662
[34m[1mwandb[0m: 	clip: 0.654635520551347
[34m[1mwandb[0m: 	gamma: 0.9183899328699668
[34m[1mwandb[0m: 	lr: 0.0001183988904116838
[34m[1mwandb[0m: 	n_update: 29
[34m[1mwandb[0m: 	training_step: 100000


Episode reward: -100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
reward,▁

0,1
reward,-100.0
