In [1]:
import gym
import time
import random
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from tf_dqn_agent import Agent

In [2]:
env = gym.make('LunarLander-v2')
env.seed(42)
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


In [3]:
state = env.reset()
print(state)

[-0.00499964  1.4194578  -0.506422    0.37943238  0.00580009  0.11471219
  0.          0.        ]


In [5]:
agent = Agent(state_size=8, action_size=4)

# watch an untrained agent
state = env.reset()
for j in range(200):
    state = np.reshape(state, [1, 8])
    action = agent.act(state)
    env.render()
    time.sleep(0.01)
    state, reward, done, _ = env.step(action)
    if done:
        break 
        
env.close()

In [6]:
episodes = 5000
for e in range(episodes + 1):
    state = env.reset()
    state = np.reshape(state, [1, 8])
    score = 0
    for t in range(1000):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, 8])
        agent.step(state, action, reward, next_state, done)
        state = next_state
        score += reward
        if done:
            print("episode: {}/{}, score: {}, after time: {}".format(e, episodes, score, t))
            break
        
        if ((t+1)% 64) == 0:
            if agent.memory.__len__()>=64:
                agent.replay()

episode: 0/5000, score: -138.38102927999813, after time: 67
episode: 1/5000, score: -357.6868016174652, after time: 102
episode: 2/5000, score: -88.91393628566242, after time: 73
episode: 3/5000, score: -240.2722115212057, after time: 93
episode: 4/5000, score: -282.4938859548379, after time: 95
episode: 5/5000, score: 12.460029496018223, after time: 129
episode: 6/5000, score: -440.5423040471584, after time: 114
episode: 7/5000, score: -245.67792303474874, after time: 122
episode: 8/5000, score: -108.73492896595694, after time: 77
episode: 9/5000, score: -180.46231253246845, after time: 127
episode: 10/5000, score: -130.365969753143, after time: 111
episode: 11/5000, score: -135.0695685696747, after time: 68
episode: 12/5000, score: -51.70968332867484, after time: 59
episode: 13/5000, score: -412.75376413083524, after time: 88
episode: 14/5000, score: -26.57029119866594, after time: 122
episode: 15/5000, score: -583.1485222457094, after time: 115
episode: 16/5000, score: -375.53080821

episode: 134/5000, score: -29.987708327401776, after time: 152
episode: 135/5000, score: -16.4409585790727, after time: 110
episode: 136/5000, score: -136.30263198187689, after time: 90
episode: 137/5000, score: -322.8336292575861, after time: 372
episode: 138/5000, score: -111.17062955996684, after time: 233
episode: 139/5000, score: -165.25886185666616, after time: 528
episode: 140/5000, score: -61.5537900554436, after time: 263
episode: 141/5000, score: -59.69178258334685, after time: 253
episode: 142/5000, score: -222.42408337389713, after time: 159
episode: 143/5000, score: -38.64341905300219, after time: 240
episode: 144/5000, score: -37.03853665398201, after time: 389
episode: 145/5000, score: -143.3033508935281, after time: 158
episode: 146/5000, score: -12.743476286874522, after time: 999
episode: 147/5000, score: -77.4505565141522, after time: 999
episode: 148/5000, score: -127.96322436071726, after time: 999
episode: 149/5000, score: -244.0374888331664, after time: 512
episo

episode: 266/5000, score: -35.37350324533241, after time: 999
episode: 267/5000, score: -69.94327361446331, after time: 999
episode: 268/5000, score: -37.810902042400386, after time: 999
episode: 269/5000, score: -129.2396713597007, after time: 273
episode: 270/5000, score: -93.96411726325317, after time: 999
episode: 271/5000, score: -78.10361536640754, after time: 999
episode: 272/5000, score: -176.85926222427824, after time: 555
episode: 273/5000, score: -151.73064808744545, after time: 999
episode: 274/5000, score: -109.90365695918655, after time: 999
episode: 275/5000, score: -191.47129833529465, after time: 324
episode: 276/5000, score: -153.81074293801527, after time: 223
episode: 277/5000, score: -246.14355597236266, after time: 984
episode: 278/5000, score: -140.70204123763673, after time: 999
episode: 279/5000, score: -242.58247262587767, after time: 721
episode: 280/5000, score: -152.46337613388081, after time: 341
episode: 281/5000, score: -166.4434688311464, after time: 90

episode: 398/5000, score: -19.383626024611274, after time: 999
episode: 399/5000, score: 81.05854491874854, after time: 999
episode: 400/5000, score: -77.55052081666079, after time: 999
episode: 401/5000, score: -10.495800083274725, after time: 999
episode: 402/5000, score: -319.95206964622764, after time: 810
episode: 403/5000, score: -213.78796599268753, after time: 132
episode: 404/5000, score: -209.30554986990037, after time: 188
episode: 405/5000, score: 101.64321425905825, after time: 865
episode: 406/5000, score: -235.25275375795337, after time: 260
episode: 407/5000, score: -99.89396468481172, after time: 999
episode: 408/5000, score: 185.80498223166626, after time: 712
episode: 409/5000, score: -232.35894432476, after time: 895
episode: 410/5000, score: -96.11287554528964, after time: 471
episode: 411/5000, score: -52.87311417819805, after time: 91
episode: 412/5000, score: -123.27707888027882, after time: 999
episode: 413/5000, score: 184.99846627462927, after time: 393
episo

episode: 531/5000, score: -119.32735121733695, after time: 70
episode: 532/5000, score: -208.76726569560225, after time: 72
episode: 533/5000, score: -64.6212502328421, after time: 76
episode: 534/5000, score: -151.60679547192183, after time: 70
episode: 535/5000, score: -237.76455600145195, after time: 118
episode: 536/5000, score: -28.06873533854663, after time: 106
episode: 537/5000, score: -257.18421398370185, after time: 142
episode: 538/5000, score: -280.6683911987453, after time: 83
episode: 539/5000, score: -209.3481656724039, after time: 94
episode: 540/5000, score: -204.1270108846816, after time: 81
episode: 541/5000, score: -258.2135024271497, after time: 77
episode: 542/5000, score: -351.4500264024326, after time: 64
episode: 543/5000, score: 3.6554081098241937, after time: 84
episode: 544/5000, score: -303.8552558103145, after time: 92
episode: 545/5000, score: -180.36251695085048, after time: 73
episode: 546/5000, score: -212.6971311493556, after time: 92
episode: 547/500

episode: 663/5000, score: -228.69736146673202, after time: 170
episode: 664/5000, score: -218.2167054887335, after time: 188
episode: 665/5000, score: -276.26145995932313, after time: 144
episode: 666/5000, score: -96.26797076235997, after time: 999
episode: 667/5000, score: -165.2330005960167, after time: 449
episode: 668/5000, score: -75.51408539736782, after time: 999
episode: 669/5000, score: -367.93054437698476, after time: 429
episode: 670/5000, score: -186.68743854262746, after time: 999
episode: 671/5000, score: -117.79563055558917, after time: 999
episode: 672/5000, score: -163.91773011910678, after time: 999
episode: 673/5000, score: -201.19960687583279, after time: 997
episode: 674/5000, score: -123.86577547186816, after time: 999
episode: 675/5000, score: -184.77863159209582, after time: 255
episode: 676/5000, score: -104.80580535306038, after time: 186
episode: 677/5000, score: -148.99960098982132, after time: 353
episode: 678/5000, score: -146.17731970611922, after time: 

episode: 795/5000, score: -9.263339367767923, after time: 999
episode: 796/5000, score: 206.87973831082567, after time: 402
episode: 797/5000, score: 168.70844896418825, after time: 975
episode: 798/5000, score: 254.49936277852953, after time: 483
episode: 799/5000, score: 290.93945893713374, after time: 348
episode: 800/5000, score: -96.76213246211816, after time: 63
episode: 801/5000, score: -64.93274956399641, after time: 86
episode: 802/5000, score: -9.573076450977268, after time: 111
episode: 803/5000, score: -28.541097152320035, after time: 63
episode: 804/5000, score: -44.4441416967876, after time: 64
episode: 805/5000, score: -29.86032842378961, after time: 80
episode: 806/5000, score: -41.15234492237832, after time: 74
episode: 807/5000, score: -12.904384539182303, after time: 70
episode: 808/5000, score: -59.49406935613588, after time: 69
episode: 809/5000, score: -15.414105397587406, after time: 78
episode: 810/5000, score: -16.54775869330578, after time: 108
episode: 811/50

episode: 928/5000, score: -96.2097699965947, after time: 263
episode: 929/5000, score: -85.28679358753888, after time: 205
episode: 930/5000, score: 182.95412040199625, after time: 446
episode: 931/5000, score: 10.14984027769431, after time: 228
episode: 932/5000, score: -63.20618170435104, after time: 357
episode: 933/5000, score: -177.9988644196814, after time: 74
episode: 934/5000, score: -122.9863499000979, after time: 218
episode: 935/5000, score: -56.733356652731004, after time: 124
episode: 936/5000, score: 289.7032747477808, after time: 217
episode: 937/5000, score: 171.46573027128755, after time: 613
episode: 938/5000, score: -8.336919456369657, after time: 167
episode: 939/5000, score: -63.61895434202949, after time: 128
episode: 940/5000, score: 32.75284918598942, after time: 999
episode: 941/5000, score: 235.67487437206285, after time: 279
episode: 942/5000, score: 44.871356809481995, after time: 232
episode: 943/5000, score: 11.326076862017729, after time: 77
episode: 944/

episode: 1060/5000, score: -50.115813342529705, after time: 183
episode: 1061/5000, score: -82.46769516709541, after time: 83
episode: 1062/5000, score: -75.85543072130508, after time: 181
episode: 1063/5000, score: -91.01636757242323, after time: 875
episode: 1064/5000, score: -205.91907073570192, after time: 139
episode: 1065/5000, score: -288.6738793031311, after time: 243
episode: 1066/5000, score: -168.13852620363787, after time: 214
episode: 1067/5000, score: -162.1297432562298, after time: 165
episode: 1068/5000, score: -365.4475633565845, after time: 272
episode: 1069/5000, score: -72.00926689950099, after time: 100
episode: 1070/5000, score: -133.98361900001584, after time: 291
episode: 1071/5000, score: -20.65643409129747, after time: 122
episode: 1072/5000, score: -125.52198780452707, after time: 113
episode: 1073/5000, score: -16.859342930893163, after time: 93
episode: 1074/5000, score: -76.2871803972545, after time: 219
episode: 1075/5000, score: -361.687896726933, after 

episode: 1190/5000, score: -71.15668425783703, after time: 999
episode: 1191/5000, score: -65.197161825774, after time: 999
episode: 1192/5000, score: -69.09686365469784, after time: 999
episode: 1193/5000, score: -197.5929649667143, after time: 505
episode: 1194/5000, score: 192.34649608320638, after time: 279
episode: 1195/5000, score: -100.7714811711806, after time: 999
episode: 1196/5000, score: -251.910912075967, after time: 710
episode: 1197/5000, score: -162.46494519314984, after time: 999
episode: 1198/5000, score: -162.56868628011563, after time: 496
episode: 1199/5000, score: -173.9009466776043, after time: 730
episode: 1200/5000, score: -97.55262067547442, after time: 234
episode: 1201/5000, score: -190.0113743133988, after time: 326
episode: 1202/5000, score: -158.77204566401554, after time: 407
episode: 1203/5000, score: 20.846541626720736, after time: 317
episode: 1204/5000, score: -107.65336042978741, after time: 413
episode: 1205/5000, score: -218.37294596709893, after 

episode: 1320/5000, score: -56.79632098146893, after time: 84
episode: 1321/5000, score: 276.58473815877494, after time: 433
episode: 1322/5000, score: -167.5380895645123, after time: 855
episode: 1323/5000, score: -95.00882708225483, after time: 116
episode: 1324/5000, score: -48.60841312521566, after time: 95
episode: 1325/5000, score: -59.784115579598236, after time: 95
episode: 1326/5000, score: -63.77128405077366, after time: 326
episode: 1327/5000, score: -111.70011502265277, after time: 119
episode: 1328/5000, score: -93.32820001182695, after time: 85
episode: 1329/5000, score: -131.21327011767033, after time: 205
episode: 1330/5000, score: 258.83115417869965, after time: 340
episode: 1331/5000, score: -29.67573752319035, after time: 101
episode: 1332/5000, score: -35.21554363476446, after time: 92
episode: 1333/5000, score: -10.378395379921926, after time: 123
episode: 1334/5000, score: -64.4735009444617, after time: 114
episode: 1335/5000, score: -224.698474915148, after time:

episode: 1451/5000, score: -43.49188015213038, after time: 108
episode: 1452/5000, score: -20.052056239892423, after time: 95
episode: 1453/5000, score: -52.4095955637288, after time: 90
episode: 1454/5000, score: -54.93126641297056, after time: 88
episode: 1455/5000, score: -100.25220511233557, after time: 75
episode: 1456/5000, score: -146.275470011455, after time: 236
episode: 1457/5000, score: -109.693465415833, after time: 140
episode: 1458/5000, score: 44.81788045152578, after time: 253
episode: 1459/5000, score: 113.69177925416973, after time: 999
episode: 1460/5000, score: -85.4688823992825, after time: 107
episode: 1461/5000, score: 32.332800835036835, after time: 107
episode: 1462/5000, score: 196.23404567362712, after time: 530
episode: 1463/5000, score: 21.610458267409044, after time: 89
episode: 1464/5000, score: 25.0134690598763, after time: 209
episode: 1465/5000, score: 202.88561327325644, after time: 265
episode: 1466/5000, score: -70.02427285062697, after time: 88
epi

episode: 1582/5000, score: -106.62823847376424, after time: 617
episode: 1583/5000, score: -134.08110845307777, after time: 999
episode: 1584/5000, score: -424.1741864937422, after time: 570
episode: 1585/5000, score: -211.36860894968459, after time: 231
episode: 1586/5000, score: -1085.6298206290528, after time: 159
episode: 1587/5000, score: -139.22686736569892, after time: 999
episode: 1588/5000, score: -198.70544190657168, after time: 999
episode: 1589/5000, score: -433.4786785337151, after time: 716
episode: 1590/5000, score: -30.33640390263126, after time: 105
episode: 1591/5000, score: -112.65867100793967, after time: 999
episode: 1592/5000, score: -214.19258408106637, after time: 225
episode: 1593/5000, score: -40.4089004573121, after time: 270
episode: 1594/5000, score: -188.01464481947446, after time: 152
episode: 1595/5000, score: -275.0821811606211, after time: 676
episode: 1596/5000, score: -187.43154988141347, after time: 113
episode: 1597/5000, score: -296.08575212057815

episode: 1712/5000, score: -61.18655424309941, after time: 999
episode: 1713/5000, score: -167.27252984132912, after time: 388
episode: 1714/5000, score: -6.688831546789729, after time: 999
episode: 1715/5000, score: -256.4065779481968, after time: 200
episode: 1716/5000, score: -24.808764603820734, after time: 117
episode: 1717/5000, score: -94.28657228218675, after time: 101
episode: 1718/5000, score: -166.99138257968474, after time: 122
episode: 1719/5000, score: -113.1637286850519, after time: 112
episode: 1720/5000, score: 139.54835389158646, after time: 938
episode: 1721/5000, score: -61.14310418250899, after time: 203
episode: 1722/5000, score: -43.63260039984529, after time: 999
episode: 1723/5000, score: 12.709972778429417, after time: 120
episode: 1724/5000, score: -44.40833807437189, after time: 90
episode: 1725/5000, score: 259.6315129226905, after time: 794
episode: 1726/5000, score: -172.9771456374536, after time: 115
episode: 1727/5000, score: -105.42313957479828, after 

episode: 1842/5000, score: -460.5809891247507, after time: 262
episode: 1843/5000, score: -344.9748715008204, after time: 97
episode: 1844/5000, score: -15.455894776863417, after time: 153
episode: 1845/5000, score: 213.3707549317539, after time: 438
episode: 1846/5000, score: -31.532975927158745, after time: 406
episode: 1847/5000, score: 260.8924644003638, after time: 382
episode: 1848/5000, score: -73.6731105864206, after time: 999
episode: 1849/5000, score: 252.72618194256415, after time: 432
episode: 1850/5000, score: 299.2990895968694, after time: 265
episode: 1851/5000, score: -140.0561213524214, after time: 923
episode: 1852/5000, score: 290.49897562285105, after time: 429
episode: 1853/5000, score: 304.0469475703671, after time: 295
episode: 1854/5000, score: 264.66812692820326, after time: 402
episode: 1855/5000, score: -83.08055359968938, after time: 999
episode: 1856/5000, score: -38.22232496096113, after time: 999
episode: 1857/5000, score: 198.2630744631578, after time: 9

episode: 1972/5000, score: 55.66290823379609, after time: 276
episode: 1973/5000, score: 209.22678882673202, after time: 798
episode: 1974/5000, score: 37.757545819966396, after time: 142
episode: 1975/5000, score: 16.848418651796393, after time: 74
episode: 1976/5000, score: -41.781743587003035, after time: 89
episode: 1977/5000, score: 260.4440770081016, after time: 337
episode: 1978/5000, score: 24.81691507043689, after time: 111
episode: 1979/5000, score: 14.424761416948456, after time: 143
episode: 1980/5000, score: -57.299295464928264, after time: 90
episode: 1981/5000, score: -137.47671927407956, after time: 109
episode: 1982/5000, score: -40.06238397971131, after time: 123
episode: 1983/5000, score: -92.39168629913947, after time: 78
episode: 1984/5000, score: -203.6606262001821, after time: 438
episode: 1985/5000, score: -78.41678408848276, after time: 85
episode: 1986/5000, score: 18.481539745641925, after time: 93
episode: 1987/5000, score: 3.4889667937179496, after time: 11

episode: 2103/5000, score: -398.5083568941028, after time: 174
episode: 2104/5000, score: -164.77720763744986, after time: 233
episode: 2105/5000, score: -193.0970473776661, after time: 177
episode: 2106/5000, score: 178.0024619226716, after time: 679
episode: 2107/5000, score: 247.5576647832194, after time: 382
episode: 2108/5000, score: -225.09874590415984, after time: 223
episode: 2109/5000, score: -265.0489358835442, after time: 103
episode: 2110/5000, score: -401.64124320004356, after time: 133
episode: 2111/5000, score: 186.65442109500606, after time: 652
episode: 2112/5000, score: 41.748157207356854, after time: 138
episode: 2113/5000, score: 269.40246228157025, after time: 624
episode: 2114/5000, score: 268.0418237239743, after time: 267
episode: 2115/5000, score: -35.42591438143578, after time: 999
episode: 2116/5000, score: 223.94031045276836, after time: 309
episode: 2117/5000, score: -30.75181039816996, after time: 999
episode: 2118/5000, score: 236.4467434471941, after tim

episode: 2234/5000, score: -8.118580083827894, after time: 115
episode: 2235/5000, score: 32.558835110159634, after time: 104
episode: 2236/5000, score: 5.372668663669799, after time: 150
episode: 2237/5000, score: 35.94837963038543, after time: 162
episode: 2238/5000, score: 241.64849868921246, after time: 432
episode: 2239/5000, score: -176.5114748893035, after time: 119
episode: 2240/5000, score: 216.2585878431301, after time: 473
episode: 2241/5000, score: -6.981733339456525, after time: 102
episode: 2242/5000, score: 162.86645217164602, after time: 561
episode: 2243/5000, score: 204.03753135869317, after time: 564
episode: 2244/5000, score: -259.32399368371324, after time: 89
episode: 2245/5000, score: -372.7238290175569, after time: 123
episode: 2246/5000, score: 214.94748972871275, after time: 529
episode: 2247/5000, score: 241.61026661005496, after time: 480
episode: 2248/5000, score: -19.974359208730817, after time: 176
episode: 2249/5000, score: 164.32084482968907, after time

episode: 2365/5000, score: -36.14901969360501, after time: 415
episode: 2366/5000, score: 242.46074929582193, after time: 758
episode: 2367/5000, score: -104.32812347902525, after time: 87
episode: 2368/5000, score: 197.96332024585223, after time: 689
episode: 2369/5000, score: 24.407171420741406, after time: 279
episode: 2370/5000, score: 97.80124152747551, after time: 785
episode: 2371/5000, score: -65.08941843557474, after time: 158
episode: 2372/5000, score: 238.34200960955636, after time: 231
episode: 2373/5000, score: 215.54107522796988, after time: 346
episode: 2374/5000, score: 175.3635266027841, after time: 725
episode: 2375/5000, score: 9.659331797234486, after time: 303
episode: 2376/5000, score: 24.20263822822608, after time: 999
episode: 2377/5000, score: 264.1816703155864, after time: 312
episode: 2378/5000, score: 196.3374307733023, after time: 583
episode: 2379/5000, score: 239.83604126133366, after time: 504
episode: 2380/5000, score: 207.4696601004802, after time: 486

episode: 2496/5000, score: 191.6186052080317, after time: 868
episode: 2497/5000, score: -321.6437734152179, after time: 107
episode: 2498/5000, score: -23.819041308475164, after time: 512
episode: 2499/5000, score: -49.283568317917336, after time: 999
episode: 2500/5000, score: -103.27217313728454, after time: 235
episode: 2501/5000, score: -171.84632061401868, after time: 100
episode: 2502/5000, score: -114.14990748543705, after time: 999
episode: 2503/5000, score: -200.07376566965215, after time: 174
episode: 2504/5000, score: -204.35415798195035, after time: 758
episode: 2505/5000, score: -300.4285273770827, after time: 143
episode: 2506/5000, score: -100.14685911717383, after time: 999
episode: 2507/5000, score: -50.40589922003703, after time: 999
episode: 2508/5000, score: -353.0535589197858, after time: 999
episode: 2509/5000, score: 115.3992596471616, after time: 629
episode: 2510/5000, score: -232.58208299714784, after time: 419
episode: 2511/5000, score: -209.38426097942323, 

episode: 2626/5000, score: 15.988930269705278, after time: 127
episode: 2627/5000, score: -54.54484310343954, after time: 178
episode: 2628/5000, score: 28.524816560327906, after time: 240
episode: 2629/5000, score: -3.7628011038098217, after time: 999
episode: 2630/5000, score: -472.9199467762999, after time: 304
episode: 2631/5000, score: -196.2294198218291, after time: 197
episode: 2632/5000, score: 17.22440450392554, after time: 282
episode: 2633/5000, score: 210.73132802724038, after time: 736
episode: 2634/5000, score: -8.563164081864144, after time: 258
episode: 2635/5000, score: -28.38143071779703, after time: 219
episode: 2636/5000, score: -41.77759751229401, after time: 159
episode: 2637/5000, score: -222.4519096394923, after time: 217
episode: 2638/5000, score: -274.4647910012862, after time: 291
episode: 2639/5000, score: -184.5048382152949, after time: 159
episode: 2640/5000, score: -351.345986535, after time: 151
episode: 2641/5000, score: 8.027330915973653, after time: 9

episode: 2756/5000, score: 142.63933321450858, after time: 561
episode: 2757/5000, score: -62.38660360242114, after time: 200
episode: 2758/5000, score: -48.6213873601862, after time: 206
episode: 2759/5000, score: -23.987575572624237, after time: 69
episode: 2760/5000, score: -77.95194407199146, after time: 283
episode: 2761/5000, score: 16.046435597658856, after time: 126
episode: 2762/5000, score: -33.465158614379064, after time: 999
episode: 2763/5000, score: -683.5782337047345, after time: 510
episode: 2764/5000, score: -152.12010709288506, after time: 333
episode: 2765/5000, score: -96.58371409519731, after time: 364
episode: 2766/5000, score: 218.266726392473, after time: 759
episode: 2767/5000, score: -86.34196890895579, after time: 999
episode: 2768/5000, score: -87.41247972465897, after time: 451
episode: 2769/5000, score: -128.38392497487072, after time: 999
episode: 2770/5000, score: -143.25292872576534, after time: 490
episode: 2771/5000, score: 0.5608454461142003, after t

episode: 2887/5000, score: -175.65424159517383, after time: 999
episode: 2888/5000, score: -47.30147712583735, after time: 191
episode: 2889/5000, score: 3.8833442697574156, after time: 314
episode: 2890/5000, score: 42.32223513297106, after time: 151
episode: 2891/5000, score: 224.40426254215322, after time: 744
episode: 2892/5000, score: 228.45227730298984, after time: 362
episode: 2893/5000, score: 17.28045910602586, after time: 132
episode: 2894/5000, score: -51.074978543762924, after time: 99
episode: 2895/5000, score: -200.85444932208122, after time: 203
episode: 2896/5000, score: -2.791944604173821, after time: 214
episode: 2897/5000, score: -139.29403355269244, after time: 803
episode: 2898/5000, score: 56.05772363331907, after time: 999
episode: 2899/5000, score: -44.28320791005699, after time: 212
episode: 2900/5000, score: -275.2108258352428, after time: 280
episode: 2901/5000, score: -190.83599830282543, after time: 254
episode: 2902/5000, score: -180.18945345599093, after 

episode: 3017/5000, score: 154.65983302248773, after time: 761
episode: 3018/5000, score: 87.3233337260807, after time: 999
episode: 3019/5000, score: 0.6848593217035983, after time: 152
episode: 3020/5000, score: -129.1355957925156, after time: 320
episode: 3021/5000, score: -196.58244245369087, after time: 93
episode: 3022/5000, score: -46.67909268186294, after time: 93
episode: 3023/5000, score: 25.32425859356809, after time: 102
episode: 3024/5000, score: -27.266199772315318, after time: 81
episode: 3025/5000, score: -50.94277362068454, after time: 95
episode: 3026/5000, score: -0.30406207784390915, after time: 111
episode: 3027/5000, score: -35.73274125941644, after time: 103
episode: 3028/5000, score: -72.02511715770235, after time: 65
episode: 3029/5000, score: -113.49657375923182, after time: 555
episode: 3030/5000, score: -76.83701440201236, after time: 86
episode: 3031/5000, score: 35.803953371228346, after time: 201
episode: 3032/5000, score: 15.16640814215755, after time: 1

episode: 3148/5000, score: -73.97456309162509, after time: 230
episode: 3149/5000, score: -112.60440771062676, after time: 706
episode: 3150/5000, score: -123.3067577282152, after time: 999
episode: 3151/5000, score: -28.821871512992985, after time: 567
episode: 3152/5000, score: -35.89495771522158, after time: 406
episode: 3153/5000, score: -63.64001368928593, after time: 580
episode: 3154/5000, score: -57.35701668124114, after time: 131
episode: 3155/5000, score: -62.86431223608324, after time: 424
episode: 3156/5000, score: -65.66632116693305, after time: 305
episode: 3157/5000, score: -102.27679829473325, after time: 247
episode: 3158/5000, score: -5.693477331798917, after time: 516
episode: 3159/5000, score: 51.647491166642276, after time: 999
episode: 3160/5000, score: -80.10275604953205, after time: 246
episode: 3161/5000, score: 60.443846534017496, after time: 999
episode: 3162/5000, score: 84.18836169869896, after time: 999
episode: 3163/5000, score: -208.84597577331152, after

episode: 3278/5000, score: -4.297536302032384, after time: 102
episode: 3279/5000, score: 256.6469262336885, after time: 184
episode: 3280/5000, score: -77.67022263939705, after time: 93
episode: 3281/5000, score: -3.270894021381096, after time: 105
episode: 3282/5000, score: -151.37413138416065, after time: 143
episode: 3283/5000, score: -315.95571698066976, after time: 172
episode: 3284/5000, score: -294.13643549385216, after time: 160
episode: 3285/5000, score: 33.427831646909056, after time: 110
episode: 3286/5000, score: -428.8973502607311, after time: 133
episode: 3287/5000, score: -482.2378043259044, after time: 123
episode: 3288/5000, score: -219.32662217642104, after time: 125
episode: 3289/5000, score: -284.31468759838003, after time: 248
episode: 3290/5000, score: -564.7771834149462, after time: 94
episode: 3291/5000, score: -685.1540662331399, after time: 110
episode: 3292/5000, score: -390.2086542596376, after time: 79
episode: 3293/5000, score: -566.7031336676456, after t

episode: 3408/5000, score: 25.86553349389459, after time: 125
episode: 3409/5000, score: 221.43334813612518, after time: 385
episode: 3410/5000, score: 234.5539733581123, after time: 572
episode: 3411/5000, score: -140.08326954572016, after time: 60
episode: 3412/5000, score: -175.110456419424, after time: 77
episode: 3413/5000, score: -127.58333788772484, after time: 76
episode: 3414/5000, score: -117.42499436820562, after time: 217
episode: 3415/5000, score: -61.33417205519868, after time: 259
episode: 3416/5000, score: -175.68913021373328, after time: 999
episode: 3417/5000, score: -256.18045695395585, after time: 523
episode: 3418/5000, score: -654.4778386864501, after time: 678
episode: 3419/5000, score: -129.85448061032176, after time: 999
episode: 3420/5000, score: -14.716360781401576, after time: 383
episode: 3421/5000, score: 111.637108532783, after time: 999
episode: 3422/5000, score: 280.59608132531463, after time: 401
episode: 3423/5000, score: -425.1625919390123, after tim

episode: 3539/5000, score: -19.485324552444297, after time: 95
episode: 3540/5000, score: -48.763322169578345, after time: 76
episode: 3541/5000, score: -414.24250796639683, after time: 99
episode: 3542/5000, score: -271.6568841495984, after time: 69
episode: 3543/5000, score: 51.64925623773691, after time: 999
episode: 3544/5000, score: -249.47081866675927, after time: 73
episode: 3545/5000, score: -553.3231895616377, after time: 611
episode: 3546/5000, score: -154.5496145919961, after time: 175
episode: 3547/5000, score: -607.7868462329944, after time: 129
episode: 3548/5000, score: -605.0928627183985, after time: 230
episode: 3549/5000, score: 261.1672902890476, after time: 391
episode: 3550/5000, score: -173.23912505766629, after time: 60
episode: 3551/5000, score: -394.15320797626725, after time: 213
episode: 3552/5000, score: -305.74548765368957, after time: 121
episode: 3553/5000, score: -225.36706497593096, after time: 238
episode: 3554/5000, score: -361.9871891497119, after ti

episode: 3669/5000, score: 68.24641574879489, after time: 764
episode: 3670/5000, score: 22.514682506235143, after time: 999
episode: 3671/5000, score: 11.909679059137801, after time: 311
episode: 3672/5000, score: 245.78274921966891, after time: 286
episode: 3673/5000, score: 7.456168074445017, after time: 122
episode: 3674/5000, score: 232.43332719136453, after time: 548
episode: 3675/5000, score: 264.1507211587841, after time: 220
episode: 3676/5000, score: -9.844657256334187, after time: 321
episode: 3677/5000, score: 154.59405384249635, after time: 829
episode: 3678/5000, score: 224.91977944668844, after time: 999
episode: 3679/5000, score: 252.47697966260802, after time: 409
episode: 3680/5000, score: 174.44149190550814, after time: 552
episode: 3681/5000, score: -91.42042343742327, after time: 157
episode: 3682/5000, score: 175.76976689525918, after time: 519
episode: 3683/5000, score: -204.04071628135137, after time: 219
episode: 3684/5000, score: 200.23863379234007, after time

episode: 3799/5000, score: -51.98020879172426, after time: 274
episode: 3800/5000, score: -6.15635963775803, after time: 217
episode: 3801/5000, score: -204.60684520693513, after time: 447
episode: 3802/5000, score: -1.5733475992764028, after time: 118
episode: 3803/5000, score: 6.564546118974022, after time: 105
episode: 3804/5000, score: 250.97982417104532, after time: 568
episode: 3805/5000, score: -20.38911871619365, after time: 228
episode: 3806/5000, score: 165.74812293649887, after time: 920
episode: 3807/5000, score: 12.238822609347885, after time: 999
episode: 3808/5000, score: -190.01695411029067, after time: 639
episode: 3809/5000, score: -72.51471380167939, after time: 999
episode: 3810/5000, score: -1048.3544446120777, after time: 521
episode: 3811/5000, score: 226.48508891671767, after time: 714
episode: 3812/5000, score: -30.088218324670336, after time: 999
episode: 3813/5000, score: 81.77913700088982, after time: 999
episode: 3814/5000, score: 238.13990751340947, after 

episode: 3929/5000, score: -122.64777522244057, after time: 213
episode: 3930/5000, score: 91.13185925895755, after time: 999
episode: 3931/5000, score: -347.06846358035534, after time: 776
episode: 3932/5000, score: 238.16653410065172, after time: 435
episode: 3933/5000, score: 163.6600078331469, after time: 801
episode: 3934/5000, score: -183.42626551298218, after time: 182
episode: 3935/5000, score: -155.14722504187333, after time: 152
episode: 3936/5000, score: -100.61566083452904, after time: 184
episode: 3937/5000, score: 193.8905645660938, after time: 504
episode: 3938/5000, score: -58.89955648776897, after time: 150
episode: 3939/5000, score: -92.06233828288076, after time: 140
episode: 3940/5000, score: -136.3306473850574, after time: 185
episode: 3941/5000, score: -94.39683858952174, after time: 150
episode: 3942/5000, score: -113.48252900168367, after time: 126
episode: 3943/5000, score: 240.4479326834566, after time: 352
episode: 3944/5000, score: -156.30333161761354, after

episode: 4059/5000, score: 102.29213854421597, after time: 573
episode: 4060/5000, score: -135.2076072918987, after time: 169
episode: 4061/5000, score: 180.5270043578886, after time: 665
episode: 4062/5000, score: 192.47071626584471, after time: 322
episode: 4063/5000, score: -35.6340145968901, after time: 152
episode: 4064/5000, score: 234.74138986047353, after time: 501
episode: 4065/5000, score: 30.42929932780632, after time: 175
episode: 4066/5000, score: 178.0922832684264, after time: 721
episode: 4067/5000, score: -256.9841680056619, after time: 273
episode: 4068/5000, score: -287.5603308502896, after time: 205
episode: 4069/5000, score: -202.80635977777763, after time: 466
episode: 4070/5000, score: -267.0908341096937, after time: 201
episode: 4071/5000, score: 192.25836028815854, after time: 573
episode: 4072/5000, score: 110.622081263725, after time: 728
episode: 4073/5000, score: 194.2773976091109, after time: 441
episode: 4074/5000, score: -280.6312992131478, after time: 24

episode: 4189/5000, score: -227.01394444168403, after time: 999
episode: 4190/5000, score: -135.60992498624492, after time: 999
episode: 4191/5000, score: -132.9641025959767, after time: 999
episode: 4192/5000, score: -603.8925545096529, after time: 297
episode: 4193/5000, score: 197.373431744864, after time: 450
episode: 4194/5000, score: -189.73954053118283, after time: 219
episode: 4195/5000, score: -278.5848262042367, after time: 487
episode: 4196/5000, score: -184.01193670857708, after time: 439
episode: 4197/5000, score: -218.65139945034093, after time: 378
episode: 4198/5000, score: -177.0788275692992, after time: 999
episode: 4199/5000, score: -673.8609001591899, after time: 328
episode: 4200/5000, score: -1077.7117187743702, after time: 375
episode: 4201/5000, score: -332.30098781523316, after time: 336
episode: 4202/5000, score: -436.57030837270287, after time: 106
episode: 4203/5000, score: -470.6301531203002, after time: 221
episode: 4204/5000, score: -801.6413114409302, af

episode: 4319/5000, score: -131.4403159291346, after time: 95
episode: 4320/5000, score: -141.21081295899882, after time: 135
episode: 4321/5000, score: -166.93990361852184, after time: 81
episode: 4322/5000, score: -166.61858539089394, after time: 106
episode: 4323/5000, score: -196.31803703407178, after time: 130
episode: 4324/5000, score: -67.24275982018985, after time: 115
episode: 4325/5000, score: -106.8397366051486, after time: 146
episode: 4326/5000, score: -49.70709913857735, after time: 90
episode: 4327/5000, score: -248.66412052377683, after time: 261
episode: 4328/5000, score: -209.27770694100667, after time: 113
episode: 4329/5000, score: -113.8113881763622, after time: 98
episode: 4330/5000, score: -98.87977354761026, after time: 91
episode: 4331/5000, score: -90.19315547262161, after time: 78
episode: 4332/5000, score: 225.3400405989848, after time: 381
episode: 4333/5000, score: -29.004905563190235, after time: 105
episode: 4334/5000, score: -101.74083842161225, after t

episode: 4449/5000, score: -216.1220066576567, after time: 182
episode: 4450/5000, score: -107.0649368363492, after time: 158
episode: 4451/5000, score: -176.4063843069901, after time: 666
episode: 4452/5000, score: -156.4324576664053, after time: 393
episode: 4453/5000, score: -104.56693821331561, after time: 850
episode: 4454/5000, score: -55.75559648493703, after time: 526
episode: 4455/5000, score: 246.80204933128385, after time: 351
episode: 4456/5000, score: -62.77298326653339, after time: 123
episode: 4457/5000, score: -5.527755842728666, after time: 115
episode: 4458/5000, score: -148.9290469384898, after time: 231
episode: 4459/5000, score: -149.2006314746722, after time: 96
episode: 4460/5000, score: -146.46114246909576, after time: 68
episode: 4461/5000, score: 204.329630365473, after time: 800
episode: 4462/5000, score: 238.92817402878248, after time: 631
episode: 4463/5000, score: 42.241296809618575, after time: 78
episode: 4464/5000, score: 27.33863923177671, after time: 

episode: 4579/5000, score: -247.44920069239004, after time: 279
episode: 4580/5000, score: -173.13827351900724, after time: 130
episode: 4581/5000, score: -177.26938401799342, after time: 99
episode: 4582/5000, score: 20.67211119313272, after time: 91
episode: 4583/5000, score: -292.93167332795616, after time: 106
episode: 4584/5000, score: -194.57447302698966, after time: 119
episode: 4585/5000, score: -257.13542624693207, after time: 132
episode: 4586/5000, score: -109.94094525586877, after time: 55
episode: 4587/5000, score: -33.88374683527681, after time: 82
episode: 4588/5000, score: -132.85965612047642, after time: 74
episode: 4589/5000, score: -104.83513330239033, after time: 78
episode: 4590/5000, score: -227.3566218045575, after time: 80
episode: 4591/5000, score: -287.5384999123232, after time: 73
episode: 4592/5000, score: -177.7765503846075, after time: 70
episode: 4593/5000, score: -304.9313052070953, after time: 71
episode: 4594/5000, score: -248.5630260394156, after time

episode: 4709/5000, score: -117.87747252396841, after time: 159
episode: 4710/5000, score: -75.54410982708731, after time: 309
episode: 4711/5000, score: -117.1526464888072, after time: 266
episode: 4712/5000, score: -377.84597571433665, after time: 106
episode: 4713/5000, score: -226.77308686071936, after time: 101
episode: 4714/5000, score: -185.34910452335367, after time: 94
episode: 4715/5000, score: 70.4696114691993, after time: 999
episode: 4716/5000, score: -127.12126867651787, after time: 285
episode: 4717/5000, score: -132.8182105117351, after time: 212
episode: 4718/5000, score: -240.30227937994917, after time: 123
episode: 4719/5000, score: -144.0094538731671, after time: 364
episode: 4720/5000, score: -310.9260049202192, after time: 808
episode: 4721/5000, score: -230.70038980669133, after time: 145
episode: 4722/5000, score: -155.10378033718482, after time: 327
episode: 4723/5000, score: -211.70379539301632, after time: 146
episode: 4724/5000, score: -131.79107251215737, a

episode: 4839/5000, score: 6.003407835072736, after time: 113
episode: 4840/5000, score: -133.24189980851105, after time: 71
episode: 4841/5000, score: -173.06786949369337, after time: 199
episode: 4842/5000, score: -225.0136526181102, after time: 80
episode: 4843/5000, score: -333.5023519897386, after time: 79
episode: 4844/5000, score: -359.42250097143346, after time: 93
episode: 4845/5000, score: -258.4820855320617, after time: 73
episode: 4846/5000, score: -235.5880216349652, after time: 83
episode: 4847/5000, score: -110.16898306951097, after time: 76
episode: 4848/5000, score: -189.6837336059171, after time: 89
episode: 4849/5000, score: -206.13103982608354, after time: 77
episode: 4850/5000, score: -224.01628269510678, after time: 76
episode: 4851/5000, score: -179.56843276148686, after time: 97
episode: 4852/5000, score: -275.77707381783, after time: 114
episode: 4853/5000, score: -270.8364912037399, after time: 75
episode: 4854/5000, score: -209.3607988291571, after time: 126


episode: 4969/5000, score: -129.24521584037132, after time: 81
episode: 4970/5000, score: -102.57282218798507, after time: 90
episode: 4971/5000, score: -209.30975124453494, after time: 160
episode: 4972/5000, score: -299.777315608288, after time: 346
episode: 4973/5000, score: -81.18975514336415, after time: 401
episode: 4974/5000, score: -238.36161720807752, after time: 164
episode: 4975/5000, score: -37.23922027873824, after time: 137
episode: 4976/5000, score: 21.367804521146752, after time: 126
episode: 4977/5000, score: -53.57247110649533, after time: 593
episode: 4978/5000, score: -167.44116793125292, after time: 154
episode: 4979/5000, score: -154.74811513012804, after time: 102
episode: 4980/5000, score: -57.41405421669106, after time: 433
episode: 4981/5000, score: -205.43306169972422, after time: 75
episode: 4982/5000, score: -292.91841499178724, after time: 399
episode: 4983/5000, score: -332.21878555675636, after time: 88
episode: 4984/5000, score: -126.38719986395618, aft

In [27]:
state = env.reset()
for j in range(300):
    state = np.reshape(state, [1, 8])
    action = agent.act(state)
    env.render()
    time.sleep(0.01)
    state, reward, done, _ = env.step(action)
    if done:
        break 
        
env.close()

In [23]:
agent.model.save_weights('weights.h5')