In [1]:
import gym
from algorithms.deeprl.ddpg.agent import DDPG
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [2]:
env = gym.make('LunarLanderContinuous-v2')
nS = env.observation_space.shape[0]
nA = env.action_space.shape[0]
action_limit = env.action_space.high[0]

In [3]:
print(nS, nA)

8 2


In [4]:
agent = DDPG(
    env, 
    nS, 
    nA, 
    action_limit, 
    hidden_dims=(400, 300),
    activation=F.relu,
    optimizer=optim.Adam, 
    pi_alpha=1e-4, 
    q_alpha=1e-3, 
    gamma=0.99, 
    tau=0.001, 
    max_memory_size=100000,
    batch_size=64,
    dir='tmp',
    name='DDPG')

In [5]:
n_games=1000

In [6]:
results = agent.learn(n_games, warmup=100, target_reward=200, log=True)

--------------------------------------------------------
Episode: 0
--------------------------------------------------------
Episode: 1
--------------------------------------------------------
Episode: 2
--------------------------------------------------------
Episode: 3
--------------------------------------------------------
Episode: 4
--------------------------------------------------------
Episode: 5
--------------------------------------------------------
Episode: 6
--------------------------------------------------------
Episode: 7
--------------------------------------------------------
Episode: 8
--------------------------------------------------------
Episode: 9
--------------------------------------------------------
Episode: 10
--------------------------------------------------------
Episode: 11
--------------------------------------------------------
Episode: 12
--------------------------------------------------------
Episode: 13
--------------------------------------------

--------------------------------------------------------
Episode: 106
Step: 9040
Score: -331.38709163278514
Train Mean: -729.4873577056225
Eval Score: -231.99127479769504
Eval Mean 100: -707.9233083958301
--------------------------------------------------------
--------------------------------------------------------
Episode: 107
Step: 9191
Score: -363.2802534861979
Train Mean: -725.0490197898653
Eval Score: -440.31184806029887
Eval Mean 100: -689.2182011935372
--------------------------------------------------------
--------------------------------------------------------
Episode: 108
Step: 9372
Score: -380.7319567716709
Train Mean: -719.7780539743442
Eval Score: -194.75649390613495
Eval Mean 100: -679.7045298627177
--------------------------------------------------------
--------------------------------------------------------
Episode: 109
Step: 9524
Score: -374.92981679906967
Train Mean: -716.0620043549276
Eval Score: -284.06913481380957
Eval Mean 100: -671.5046061241068
-----------

--------------------------------------------------------
Episode: 138
Step: 15641
Score: -127.58331671434803
Train Mean: -582.0154101440269
Eval Score: -133.61328914878862
Eval Mean 100: -538.466280019272
--------------------------------------------------------
--------------------------------------------------------
Episode: 139
Step: 15914
Score: -145.32094558812986
Train Mean: -576.6614992908985
Eval Score: -236.76463255141826
Eval Mean 100: -536.5144201019465
--------------------------------------------------------
--------------------------------------------------------
Episode: 140
Step: 16297
Score: -146.39208287296276
Train Mean: -571.1923462217226
Eval Score: -182.07339551591514
Eval Mean 100: -530.7606244951638
--------------------------------------------------------
--------------------------------------------------------
Episode: 141
Step: 16507
Score: -207.72976694665778
Train Mean: -568.0083920965805
Eval Score: -148.81960511107678
Eval Mean 100: -527.97279348959
--------

--------------------------------------------------------
Episode: 170
Step: 23457
Score: -283.5229575155173
Train Mean: -450.49675324644346
Eval Score: -201.52163796915966
Eval Mean 100: -423.8380881339381
--------------------------------------------------------
--------------------------------------------------------
Episode: 171
Step: 23772
Score: -223.95007403074487
Train Mean: -445.10707552307736
Eval Score: -265.8587889528253
Eval Mean 100: -422.53071911039615
--------------------------------------------------------
--------------------------------------------------------
Episode: 172
Step: 24040
Score: -179.83605102099474
Train Mean: -441.2418683149583
Eval Score: -146.1278619440344
Eval Mean 100: -419.2207897250297
--------------------------------------------------------
--------------------------------------------------------
Episode: 173
Step: 24525
Score: -268.18854916677856
Train Mean: -439.1250681695628
Eval Score: -188.59897564937404
Eval Mean 100: -416.7512106749143
-----

--------------------------------------------------------
Episode: 202
Step: 34483
Score: -198.01928412253162
Train Mean: -215.681886082971
Eval Score: -199.491478686223
Eval Mean 100: -214.76825144451323
--------------------------------------------------------
--------------------------------------------------------
Episode: 203
Step: 35103
Score: -226.63846321646741
Train Mean: -207.491394904751
Eval Score: -169.94911012706478
Eval Mean 100: -210.864216595246
--------------------------------------------------------
--------------------------------------------------------
Episode: 204
Step: 35303
Score: -178.89199878308438
Train Mean: -209.46103747833303
Eval Score: -201.45336636488756
Eval Mean 100: -212.49669770563236
--------------------------------------------------------
--------------------------------------------------------
Episode: 205
Step: 35680
Score: -144.07986245283388
Train Mean: -209.32009653582134
Eval Score: -284.4581903710702
Eval Mean 100: -213.82706353404356
------

--------------------------------------------------------
Episode: 234
Step: 47390
Score: -199.46622567057005
Train Mean: -194.1832595680235
Eval Score: -93.23877320074556
Eval Mean 100: -203.98015638269698
--------------------------------------------------------
--------------------------------------------------------
Episode: 235
Step: 47558
Score: -153.33365430636854
Train Mean: -194.1426082077733
Eval Score: -146.36602137762912
Eval Mean 100: -203.71544355812952
--------------------------------------------------------
--------------------------------------------------------
Episode: 236
Step: 47771
Score: -153.879711239739
Train Mean: -193.4628559016375
Eval Score: -161.59808031918044
Eval Mean 100: -202.93077910508526
--------------------------------------------------------
--------------------------------------------------------
Episode: 237
Step: 48172
Score: -254.3952723714481
Train Mean: -194.16112634234312
Eval Score: -160.1300619680491
Eval Mean 100: -203.15353385232
--------

--------------------------------------------------------
Episode: 266
Step: 61820
Score: -133.77919448947682
Train Mean: -191.2158633249865
Eval Score: -153.9882786001568
Eval Mean 100: -194.82848899778216
--------------------------------------------------------
--------------------------------------------------------
Episode: 267
Step: 62488
Score: -165.2133867790983
Train Mean: -191.06996449480917
Eval Score: -176.74309423039807
Eval Mean 100: -194.68974329635338
--------------------------------------------------------
--------------------------------------------------------
Episode: 268
Step: 63015
Score: -137.53167300920285
Train Mean: -190.73371725188062
Eval Score: -146.94438568503918
Eval Mean 100: -192.55187100697523
--------------------------------------------------------
--------------------------------------------------------
Episode: 269
Step: 63352
Score: -82.8172720988015
Train Mean: -189.76642440382534
Eval Score: -169.63174667881978
Eval Mean 100: -191.76821138836186
--

--------------------------------------------------------
Episode: 298
Step: 79886
Score: -186.35716860105265
Train Mean: -170.50263152289105
Eval Score: -42.9675491824623
Eval Mean 100: -169.95904260843778
--------------------------------------------------------
--------------------------------------------------------
Episode: 299
Step: 80503
Score: -165.14465099339708
Train Mean: -169.37361659787092
Eval Score: -95.07245546673742
Eval Mean 100: -169.64016787686313
--------------------------------------------------------
--------------------------------------------------------
Episode: 300
Step: 81320
Score: -187.9816503521066
Train Mean: -169.38347860491086
Eval Score: -118.8427027601952
Eval Mean 100: -169.01732792445256
--------------------------------------------------------
--------------------------------------------------------
Episode: 301
Step: 81911
Score: -117.48629043167539
Train Mean: -168.40458733785965
Eval Score: -106.29034582178491
Eval Mean 100: -167.145768288362
----

--------------------------------------------------------
Episode: 330
Step: 100934
Score: -82.1798795668225
Train Mean: -151.4969892453709
Eval Score: -100.41045423002397
Eval Mean 100: -144.85708969097107
--------------------------------------------------------
--------------------------------------------------------
Episode: 331
Step: 101934
Score: -82.93525391566114
Train Mean: -150.55099066593468
Eval Score: -61.721818817191945
Eval Mean 100: -144.27613682091408
--------------------------------------------------------
--------------------------------------------------------
Episode: 332
Step: 102934
Score: -126.47977153875591
Train Mean: -150.03403645013208
Eval Score: -40.69903659649282
Eval Mean 100: -142.15495351502182
--------------------------------------------------------
--------------------------------------------------------
Episode: 333
Step: 103934
Score: -50.73504083116569
Train Mean: -148.2898504340216
Eval Score: -36.390783417166325
Eval Mean 100: -140.58369668766528


--------------------------------------------------------
Episode: 362
Step: 125293
Score: 269.84120488256855
Train Mean: -75.22358985601815
Eval Score: -127.34093005278656
Eval Mean 100: -77.15148647769495
--------------------------------------------------------
--------------------------------------------------------
Episode: 363
Step: 125660
Score: 248.43492233447336
Train Mean: -71.5939398868528
Eval Score: 237.16605866486398
Eval Mean 100: -72.9320039622491
--------------------------------------------------------
--------------------------------------------------------
Episode: 364
Step: 126266
Score: 222.4579705250888
Train Mean: -67.65639008059753
Eval Score: 221.43920179737964
Eval Mean 100: -68.19364206685862
--------------------------------------------------------
--------------------------------------------------------
Episode: 365
Step: 126772
Score: 257.2150386793003
Train Mean: -63.47796916666427
Eval Score: 237.17257103076776
Eval Mean 100: -65.07536455765234
------------

--------------------------------------------------------
Episode: 394
Step: 139626
Score: 46.94858006550811
Train Mean: 22.383779563375334
Eval Score: 19.853209539039824
Eval Mean 100: 25.76798702527121
--------------------------------------------------------
--------------------------------------------------------
Episode: 395
Step: 140127
Score: -24.137864698816003
Train Mean: 23.01221636381794
Eval Score: 189.1828551293791
Eval Mean 100: 28.274612212817548
--------------------------------------------------------
--------------------------------------------------------
Episode: 396
Step: 140511
Score: 226.2918283950414
Train Mean: 26.030273083303854
Eval Score: 261.21130668201124
Eval Mean 100: 31.8653894572443
--------------------------------------------------------
--------------------------------------------------------
Episode: 397
Step: 141260
Score: 215.61128139339672
Train Mean: 29.4902286199125
Eval Score: 18.29628405691298
Eval Mean 100: 33.75594264478088
-------------------

KeyboardInterrupt: 

In [None]:
plt.plot(results)