In [1]:
# Source: https://github.com/keras-rl/keras-rl

# Normalized Advantage Function (NAF) agents is a way of extending DQN to a continuous action space.
# In NAF the Q-function is decomposed into an advantage term A and state value term V

# Requirements:
# pip install tensorflow
# pip install gym
# pip install keras-rl2

import numpy as np
import gym

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

from rl.agents import NAFAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from rl.core import Processor

In [2]:
# Initializing PARAMETERS
num_frames = 1
theta = .15
mu = 0.
sigma = .3
learning_rate = 0.001
gamma = .99
target_model_update = 1e-3
memory_limit = 100000
time_steps = 10000
episodes = 2

In [3]:
# Get the environment and extract the number of actions.
env = gym.make('CarRacing-v0')

# getting the number of actions in the action space
num_actions = env.action_space.shape[0]

# extract action inputs and observation inputs
action_input = Input(shape=(num_actions,), name='action_input')
observation_input = Input(shape=(num_frames,) + env.observation_space.shape, name='observation_input')

print(action_input.shape)
print(observation_input.shape)

(None, 3)
(None, 1, 96, 96, 3)




In [4]:
# Building a V-model to learn the state value terms
def build_V_model(environment, summary=False):
    V_model = Sequential()
    V_model.add(Flatten(input_shape=(num_frames,) + environment.observation_space.shape))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(1))
    V_model.add(Activation('linear'))
    if summary:
        print(V_model.summary())
    return V_model


# Bulding the mu-model. The mu-model is always the action that maximizes the Q function
# Since the mu-model chooses the action deterministically,
# but adding random_process balances exploration and exploitation.
def build_mu_model(environment, summary=False):
    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(num_frames,) + env.observation_space.shape))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(num_actions))
    mu_model.add(Activation('linear'))
    if summary:
        print(mu_model.summary())
    return mu_model


# Bulding a L-model to learn advantage term A
def build_L_model(action_input, observation_input, summary=False):
    x = Concatenate()([action_input, Flatten()(observation_input)])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(((num_actions * num_actions + num_actions) // 2))(x)
    x = Activation('linear')(x)
    L_model = Model(inputs=[action_input, observation_input], outputs=x)
    if summary:
        print(L_model.summary())
    return L_model



In [5]:
V_model = build_V_model(env, summary=True)
mu_model = build_mu_model(env, summary=True)
L_model = build_L_model(action_input, observation_input, summary=True)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 27648)             0         
_________________________________________________________________
dense (Dense)                (None, 16)                442384    
_________________________________________________________________
activation (Activation)      (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0

In [6]:
# Configuring and compiling the Agent
memory = SequentialMemory(limit=memory_limit, window_length=num_frames)
random_process = OrnsteinUhlenbeckProcess(theta=theta, mu=mu, sigma=sigma, size=num_actions)
agent = NAFAgent(nb_actions=num_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
                 memory=memory, nb_steps_warmup=100, random_process=random_process,
                 gamma=gamma, target_model_update=target_model_update)

agent.compile(Adam(lr=learning_rate, clipnorm=1.), metrics=['mae'])

In [13]:
# Training the model (with visualizatoin but turn it off to speed up the learning process)
#agent.fit(env, nb_steps=time_steps, visualize=True, verbose=1, nb_max_episode_steps=episodes)

Training for 10000 steps ...
Track generation: 958..1207 -> 249-tiles track
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 6:24 - reward: 7.9645Track generation: 985..1244 -> 259-tiles track
    4/10000 [..............................] - ETA: 11:18 - reward: 3.8541Track generation: 1241..1555 -> 314-tiles track
    6/10000 [..............................] - ETA: 12:32 - reward: 3.6010Track generation: 1200..1504 -> 304-tiles track
    7/10000 [..............................] - ETA: 13:44 - reward: 4.0153Track generation: 1174..1472 -> 298-tiles track
    9/10000 [..............................] - ETA: 13:43 - reward: 3.8490Track generation: 1172..1478 -> 306-tiles track
   11/10000 [..............................] - ETA: 13:45 - reward: 3.7271Track generation: 1200..1512 -> 312-tiles track
   13/10000 [..............................] - ETA: 14:02 - reward: 3.6330Track generation: 1367..1713 -> 346-tiles track
   16/10000 [............................

  124/10000 [..............................] - ETA: 24:16 - reward: 91.4223Track generation: 1152..1463 -> 311-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1088..1373 -> 285-tiles track
  126/10000 [..............................] - ETA: 24:24 - reward: 97.9061Track generation: 1042..1306 -> 264-tiles track
  128/10000 [..............................] - ETA: 24:28 - reward: 104.1872Track generation: 1356..1699 -> 343-tiles track
  130/10000 [..............................] - ETA: 24:40 - reward: 110.2751Track generation: 1227..1538 -> 311-tiles track
  132/10000 [..............................] - ETA: 24:46 - reward: 116.1785Track generation: 1145..1435 -> 290-tiles track
  134/10000 [..............................] - ETA: 24:51 - reward: 121.9057Track generation: 1167..1471 -> 304-tiles track
  136/10000 [..............................] - ETA: 24:56 - reward: 127.4645Track generation: 1089..1371 -> 282-tiles track
  138/

  232/10000 [..............................] - ETA: 28:07 - reward: 281.5757Track generation: 1212..1519 -> 307-tiles track
  234/10000 [..............................] - ETA: 28:07 - reward: 283.4417Track generation: 1251..1574 -> 323-tiles track
  236/10000 [..............................] - ETA: 28:08 - reward: 285.2761Track generation: 1155..1448 -> 293-tiles track
  238/10000 [..............................] - ETA: 28:09 - reward: 287.0797Track generation: 1228..1539 -> 311-tiles track
  240/10000 [..............................] - ETA: 28:11 - reward: 288.8532Track generation: 1123..1408 -> 285-tiles track
  242/10000 [..............................] - ETA: 28:11 - reward: 290.5974Track generation: 1100..1379 -> 279-tiles track
  244/10000 [..............................] - ETA: 28:10 - reward: 292.3130Track generation: 1058..1331 -> 273-tiles track
  246/10000 [..............................] - ETA: 28:09 - reward: 294.0007Track generation: 1123..1408 -> 285-tiles track
  248/10

  354/10000 [>.............................] - ETA: 28:36 - reward: 356.8174Track generation: 1182..1492 -> 310-tiles track
  356/10000 [>.............................] - ETA: 28:36 - reward: 357.6213Track generation: 951..1193 -> 242-tiles track
  358/10000 [>.............................] - ETA: 28:35 - reward: 358.4161Track generation: 1172..1469 -> 297-tiles track
  360/10000 [>.............................] - ETA: 28:35 - reward: 359.2021Track generation: 1011..1274 -> 263-tiles track
  362/10000 [>.............................] - ETA: 28:36 - reward: 359.9795Track generation: 859..1083 -> 224-tiles track
  364/10000 [>.............................] - ETA: 28:35 - reward: 360.7483Track generation: 1145..1435 -> 290-tiles track
  366/10000 [>.............................] - ETA: 28:34 - reward: 361.5086Track generation: 1189..1490 -> 301-tiles track
  368/10000 [>.............................] - ETA: 28:33 - reward: 362.2608Track generation: 999..1253 -> 254-tiles track
  370/10000

  478/10000 [>.............................] - ETA: 28:49 - reward: 393.9351Track generation: 1031..1298 -> 267-tiles track
  480/10000 [>.............................] - ETA: 28:47 - reward: 394.3766Track generation: 1134..1423 -> 289-tiles track
  482/10000 [>.............................] - ETA: 28:47 - reward: 394.8145Track generation: 1287..1613 -> 326-tiles track
  484/10000 [>.............................] - ETA: 28:47 - reward: 395.2487Track generation: 1131..1422 -> 291-tiles track
  486/10000 [>.............................] - ETA: 28:46 - reward: 395.6794Track generation: 1243..1558 -> 315-tiles track
  488/10000 [>.............................] - ETA: 28:46 - reward: 396.1065Track generation: 1131..1419 -> 288-tiles track
  490/10000 [>.............................] - ETA: 28:45 - reward: 396.5301Track generation: 1136..1424 -> 288-tiles track
  492/10000 [>.............................] - ETA: 28:45 - reward: 396.9503Track generation: 1104..1384 -> 280-tiles track
  494/10

  608/10000 [>.............................] - ETA: 28:44 - reward: 416.5920Track generation: 1311..1643 -> 332-tiles track
  610/10000 [>.............................] - ETA: 28:44 - reward: 416.8652Track generation: 966..1218 -> 252-tiles track
  612/10000 [>.............................] - ETA: 28:43 - reward: 417.1365Track generation: 1091..1375 -> 284-tiles track
  614/10000 [>.............................] - ETA: 28:42 - reward: 417.4061Track generation: 1334..1671 -> 337-tiles track
  616/10000 [>.............................] - ETA: 28:41 - reward: 417.6740Track generation: 1032..1294 -> 262-tiles track
  618/10000 [>.............................] - ETA: 28:40 - reward: 417.9401Track generation: 1087..1364 -> 277-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1225..1535 -> 310-tiles track
  620/10000 [>.............................] - ETA: 28:39 - reward: 418.2045Track generation: 1047..1313 -> 266-tiles track
  622

  734/10000 [=>............................] - ETA: 27:58 - reward: 430.8929Track generation: 1156..1449 -> 293-tiles track
  736/10000 [=>............................] - ETA: 27:57 - reward: 431.0804Track generation: 1156..1447 -> 291-tiles track
  738/10000 [=>............................] - ETA: 27:56 - reward: 431.2669Track generation: 1148..1439 -> 291-tiles track
  740/10000 [=>............................] - ETA: 27:55 - reward: 431.4524Track generation: 1059..1329 -> 270-tiles track
  742/10000 [=>............................] - ETA: 27:54 - reward: 431.6369Track generation: 984..1234 -> 250-tiles track
  744/10000 [=>............................] - ETA: 27:54 - reward: 431.8204Track generation: 1454..1822 -> 368-tiles track
  746/10000 [=>............................] - ETA: 27:53 - reward: 432.0029Track generation: 1236..1549 -> 313-tiles track
  748/10000 [=>............................] - ETA: 27:53 - reward: 432.1844Track generation: 1068..1339 -> 271-tiles track
  750/100

  982/10000 [=>............................] - ETA: 26:47 - reward: 448.3203Track generation: 1228..1539 -> 311-tiles track
  984/10000 [=>............................] - ETA: 26:47 - reward: 448.4252Track generation: 1044..1317 -> 273-tiles track
  986/10000 [=>............................] - ETA: 26:47 - reward: 448.5296Track generation: 1054..1331 -> 277-tiles track
  988/10000 [=>............................] - ETA: 26:46 - reward: 448.6336Track generation: 1225..1536 -> 311-tiles track
  990/10000 [=>............................] - ETA: 26:45 - reward: 448.7371Track generation: 975..1231 -> 256-tiles track
  992/10000 [=>............................] - ETA: 26:45 - reward: 448.8403Track generation: 1300..1629 -> 329-tiles track
  994/10000 [=>............................] - ETA: 26:44 - reward: 448.9430Track generation: 1044..1315 -> 271-tiles track
  996/10000 [=>............................] - ETA: 26:43 - reward: 449.0453Track generation: 1275..1598 -> 323-tiles track
  998/100

 1100/10000 [==>...........................] - ETA: 26:32 - reward: 453.8534Track generation: 1162..1463 -> 301-tiles track
 1102/10000 [==>...........................] - ETA: 26:32 - reward: 453.9370Track generation: 1124..1409 -> 285-tiles track
 1104/10000 [==>...........................] - ETA: 26:31 - reward: 454.0203Track generation: 1030..1292 -> 262-tiles track
 1106/10000 [==>...........................] - ETA: 26:30 - reward: 454.1032Track generation: 1064..1334 -> 270-tiles track
 1108/10000 [==>...........................] - ETA: 26:30 - reward: 454.1859Track generation: 1153..1445 -> 292-tiles track
 1110/10000 [==>...........................] - ETA: 26:29 - reward: 454.2683Track generation: 1125..1409 -> 284-tiles track
 1112/10000 [==>...........................] - ETA: 26:28 - reward: 454.3503Track generation: 1200..1510 -> 310-tiles track
 1114/10000 [==>...........................] - ETA: 26:28 - reward: 454.4321Track generation: 1111..1397 -> 286-tiles track
 1116/10

 1226/10000 [==>...........................] - ETA: 26:05 - reward: 458.5858Track generation: 1244..1559 -> 315-tiles track
 1228/10000 [==>...........................] - ETA: 26:04 - reward: 458.6531Track generation: 1127..1418 -> 291-tiles track
 1230/10000 [==>...........................] - ETA: 26:04 - reward: 458.7201Track generation: 1158..1460 -> 302-tiles track
 1232/10000 [==>...........................] - ETA: 26:04 - reward: 458.7870Track generation: 1124..1409 -> 285-tiles track
 1234/10000 [==>...........................] - ETA: 26:04 - reward: 458.8536Track generation: 1124..1409 -> 285-tiles track
 1236/10000 [==>...........................] - ETA: 26:03 - reward: 458.9200Track generation: 1015..1273 -> 258-tiles track
 1238/10000 [==>...........................] - ETA: 26:02 - reward: 458.9862Track generation: 1040..1304 -> 264-tiles track
 1240/10000 [==>...........................] - ETA: 26:02 - reward: 459.0522Track generation: 1207..1513 -> 306-tiles track
 1242/10

 1474/10000 [===>..........................] - ETA: 25:28 - reward: 465.5369Track generation: 1167..1463 -> 296-tiles track
 1476/10000 [===>..........................] - ETA: 25:28 - reward: 465.5834Track generation: 1160..1461 -> 301-tiles track
 1478/10000 [===>..........................] - ETA: 25:28 - reward: 465.6299Track generation: 1319..1653 -> 334-tiles track
 1480/10000 [===>..........................] - ETA: 25:27 - reward: 465.6762Track generation: 1143..1433 -> 290-tiles track
 1482/10000 [===>..........................] - ETA: 25:27 - reward: 465.7224Track generation: 1025..1295 -> 270-tiles track
 1484/10000 [===>..........................] - ETA: 25:26 - reward: 465.7684Track generation: 1124..1409 -> 285-tiles track
 1486/10000 [===>..........................] - ETA: 25:26 - reward: 465.8144Track generation: 1147..1438 -> 291-tiles track
 1488/10000 [===>..........................] - ETA: 25:26 - reward: 465.8602Track generation: 1231..1543 -> 312-tiles track
 1490/10

 1726/10000 [====>.........................] - ETA: 24:48 - reward: 470.5540Track generation: 1117..1406 -> 289-tiles track
 1728/10000 [====>.........................] - ETA: 24:47 - reward: 470.5879Track generation: 1141..1430 -> 289-tiles track
 1730/10000 [====>.........................] - ETA: 24:47 - reward: 470.6218Track generation: 1123..1408 -> 285-tiles track
 1732/10000 [====>.........................] - ETA: 24:46 - reward: 470.6556Track generation: 1204..1509 -> 305-tiles track
 1734/10000 [====>.........................] - ETA: 24:46 - reward: 470.6894Track generation: 1195..1498 -> 303-tiles track
 1736/10000 [====>.........................] - ETA: 24:46 - reward: 470.7230Track generation: 1083..1358 -> 275-tiles track
 1738/10000 [====>.........................] - ETA: 24:45 - reward: 470.7566Track generation: 952..1198 -> 246-tiles track
 1740/10000 [====>.........................] - ETA: 24:45 - reward: 470.7901Track generation: 1259..1578 -> 319-tiles track
 1742/100

 1846/10000 [====>.........................] - ETA: 24:26 - reward: 472.4616Track generation: 1055..1323 -> 268-tiles track
 1848/10000 [====>.........................] - ETA: 24:26 - reward: 472.4913Track generation: 1284..1609 -> 325-tiles track
 1850/10000 [====>.........................] - ETA: 24:25 - reward: 472.5210Track generation: 1095..1373 -> 278-tiles track
 1852/10000 [====>.........................] - ETA: 24:25 - reward: 472.5505Track generation: 1224..1534 -> 310-tiles track
 1854/10000 [====>.........................] - ETA: 24:24 - reward: 472.5800Track generation: 1152..1444 -> 292-tiles track
 1856/10000 [====>.........................] - ETA: 24:24 - reward: 472.6095Track generation: 1355..1706 -> 351-tiles track
 1858/10000 [====>.........................] - ETA: 24:24 - reward: 472.6388Track generation: 1168..1464 -> 296-tiles track
 1860/10000 [====>.........................] - ETA: 24:23 - reward: 472.6682Track generation: 1216..1519 -> 303-tiles track
 1862/10

 1970/10000 [====>.........................] - ETA: 24:04 - reward: 474.1887Track generation: 1195..1498 -> 303-tiles track
 1972/10000 [====>.........................] - ETA: 24:04 - reward: 474.2148Track generation: 1080..1359 -> 279-tiles track
 1974/10000 [====>.........................] - ETA: 24:03 - reward: 474.2408Track generation: 1100..1379 -> 279-tiles track
 1976/10000 [====>.........................] - ETA: 24:03 - reward: 474.2668Track generation: 1268..1589 -> 321-tiles track
 1978/10000 [====>.........................] - ETA: 24:03 - reward: 474.2927Track generation: 1243..1558 -> 315-tiles track
 1980/10000 [====>.........................] - ETA: 24:02 - reward: 474.3186Track generation: 1168..1464 -> 296-tiles track
 1982/10000 [====>.........................] - ETA: 24:02 - reward: 474.3444Track generation: 1319..1653 -> 334-tiles track
 1984/10000 [====>.........................] - ETA: 24:02 - reward: 474.3701Track generation: 1212..1519 -> 307-tiles track
 1986/10

 2092/10000 [=====>........................] - ETA: 23:43 - reward: 475.6881Track generation: 1045..1310 -> 265-tiles track
 2094/10000 [=====>........................] - ETA: 23:43 - reward: 475.7113Track generation: 1103..1383 -> 280-tiles track
 2096/10000 [=====>........................] - ETA: 23:42 - reward: 475.7343Track generation: 1196..1499 -> 303-tiles track
 2098/10000 [=====>........................] - ETA: 23:42 - reward: 475.7574Track generation: 1179..1478 -> 299-tiles track
 2100/10000 [=====>........................] - ETA: 23:42 - reward: 475.7804Track generation: 1051..1322 -> 271-tiles track
 2102/10000 [=====>........................] - ETA: 23:41 - reward: 475.8033Track generation: 1248..1564 -> 316-tiles track
 2104/10000 [=====>........................] - ETA: 23:41 - reward: 475.8262Track generation: 1365..1714 -> 349-tiles track
 2106/10000 [=====>........................] - ETA: 23:55 - reward: 475.8491Track generation: 1312..1644 -> 332-tiles track
 2108/10

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1325..1660 -> 335-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1186..1487 -> 301-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1104..1392 -> 288-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 928..1171 -> 243-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1187..1488 -> 301-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1231..1543 -> 312-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1165..1460 -> 295-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1209..1515 -> 306-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1091..1368 -> 277-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1067..1342 -> 275-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1183..1483 -> 300-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1184..1484 -> 300-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1091..1370 -> 279-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1288..1614 -> 326-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1046..1316 -> 270-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1102..1388 -> 286-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1048..1326 -> 278-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1164..1459 -> 295-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1261..1580 -> 319-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1152..1444 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1067..1338 -> 271-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1132..1419 -> 287-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1282..1607 -> 325-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 989..1240 -> 251-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1117..1404 -> 287-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1112..1394 -> 282-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1066..1337 -> 271-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1242..1557 -> 315-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1211..1518 -> 307-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1228..1539 -> 311-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1130..1416 -> 286-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1139..1428 -> 289-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1082..1363 -> 281-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1140..1433 -> 293-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 983..1239 -> 256-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1154..1447 -> 293-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1031..1293 -> 262-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1001..1257 -> 256-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1171..1477 -> 306-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1194..1497 -> 303-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1072..1

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1028..1289 -> 261-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1127..1413 -> 286-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1329..1666 -> 337-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1119..1410 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1340..1679 -> 339-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1192..1494 -> 302-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1192..1494 -> 302-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1214..

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1124..1409 -> 285-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1031..1293 -> 262-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1019..1278 -> 259-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1272..1594 -> 322-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1116..1399 -> 283-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1057..1325 -> 268-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1149..1441 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1096..1373 -> 277-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1112..1394 -> 282-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1071..1343 -> 272-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1196..1499 -> 303-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1209..1515 -> 306-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1093..1371 -> 278-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1088..1364 -> 276-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1097..1375 -> 278-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1138..1427 -> 289-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1147..1438 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1110..1398 -> 288-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1244..1559 -> 315-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1300..1629 -> 329-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1147..1444 -> 297-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1300..1629 -> 329-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1083..1362 -> 279-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 992..1248 -> 256-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1339..1678 -> 339-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1192..1501 -> 309-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 977..1232 -> 255-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1095..1373 -> 278-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1096..1374 -> 278-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1132..1423 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1300..1630 -> 330-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1082..1363 -> 281-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1200..1504 -> 304-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1168..1464 -> 296-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1176..1474 -> 298-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1217..1525 -> 308-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 964..1209 -> 245-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1110..1402 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1239..1553 -> 314-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1286..1610 -> 324-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1088..1364 -> 276-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1119..1403 -> 284-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1127..1414 -> 287-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1021..1280 -> 259-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1159..1453 -> 294-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1240..1554 -> 314-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1136..1424 -> 288-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1088..1364 -> 276-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1223..1533 -> 310-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1079..

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1219..1528 -> 309-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1010..1309 -> 299-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1073..1345 -> 272-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1054..1328 -> 274-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1116..1399 -> 283-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1144..1441 -> 297-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1125..1417 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1287..1621 -> 334-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1237..1550 -> 313-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1064..1334 -> 270-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1335..1673 -> 338-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1120..1404 -> 284-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1201..1506 -> 305-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1182..1482 -> 300-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1139..1428 -> 289-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1135..1424 -> 289-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1317..1651 -> 334-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1187..1488 -> 301-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1044..1309 -> 265-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1063..1333 -> 270-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1175..1473 -> 298-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1099..1378 -> 279-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1096..1382 -> 286-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1132..1425 -> 293-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1267..1588 -> 321-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1195..1498 -> 303-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1216..1524 -> 308-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1103..1391 -> 288-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 975..1223 -> 248-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1128..1425 -> 297-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1164..1459 -> 295-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1244..1559 -> 315-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1195..1498 -> 303-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1163..1457 -> 294-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1113..1399 -> 286-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1035..1

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1093..1371 -> 278-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1068..1339 -> 271-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1072..1344 -> 272-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1337..1675 -> 338-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1083..1358 -> 275-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1232..1544 -> 312-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1239..1553 -> 314-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1071..1351 -> 280-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1045..1314 -> 269-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1278..1608 -> 330-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1021..1280 -> 259-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1221..1530 -> 309-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1019..1288 -> 269-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1099..1378 -> 279-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1079..1353 -> 274-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1102..1382 -> 280-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1254..1577 -> 323-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1323..1658 -> 335-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1251..1568 -> 317-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1153..1454 -> 301-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1216..1524 -> 308-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1027..1288 -> 261-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1244..1560 -> 316-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1301..1630 -> 329-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1103..1383 -> 280-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1144..1434 -> 290-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1124..1409 -> 285-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1350..1692 -> 342-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1104..1384 -> 280-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1017..1278 -> 261-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1190..1491 -> 301-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 983..1233 -> 250-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1050..1316 -> 266-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1104..1392 -> 288-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1247..1563 -> 316-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1077..1350 -> 273-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1252..1569 -> 317-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1196..1499 -> 303-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1098..1382 -> 284-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1000..1256 -> 256-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1239..1553 -> 314-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1179..1478 -> 299-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1004..1271 -> 267-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1054..1331 -> 277-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1155..

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1224..1534 -> 310-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1311..1643 -> 332-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1302..1632 -> 330-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1291..1618 -> 327-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 970..1217 -> 247-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1106..1386 -> 280-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1223..1533 -> 310-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1263..1

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1092..1369 -> 277-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1144..1439 -> 295-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1056..1331 -> 275-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1101..1380 -> 279-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1204..1518 -> 314-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1159..1463 -> 304-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 976..1230 -> 254-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1105..1385 -> 280-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1312..1644 -> 332-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1148..1439 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1111..1393 -> 282-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1181..1481 -> 300-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1092..1369 -> 277-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1140..1429 -> 289-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1293..1624 -> 331-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 972..1225 -> 253-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1161..1455 -> 294-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1168..1464 -> 296-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1396..1749 -> 353-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1146..1438 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1068..1339 -> 271-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1209..1515 -> 306-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1216..1524 -> 308-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1159..1450 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1192..1494 -> 302-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1152..1449 -> 297-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1153..1445 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1075..1348 -> 273-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1093..1374 -> 281-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1192..1494 -> 302-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1107..1388 -> 281-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1255..1573 -> 318-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1059..1335 -> 276-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1015..1277 -> 262-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1283..1567 -> 284-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1084..



retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1112..1402 -> 290-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1116..1406 -> 290-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1092..1369 -> 277-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 996..1249 -> 253-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1013..1279 -> 266-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1098..1377 -> 279-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1295..1623 -> 328-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1189..1

retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1143..1432 -> 289-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1020..1279 -> 259-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1232..1544 -> 312-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1114..1397 -> 283-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1192..1494 -> 302-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1242..1557 -> 315-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1121..1409 -> 288-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1083..1365 -> 282-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1143..1433 -> 290-tiles track


retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1118..1410 -> 292-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1215..1523 -> 308-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1132..1423 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1273..1595 -> 322-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1330..1666 -> 336-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1219..1528 -> 309-tiles track
done, took 2008.320 seconds


<tensorflow.python.keras.callbacks.History at 0x7fb8ce7f3370>

In [14]:
# saving the weights
#agent.save_weights('cdqn_{}_weights.h5f'.format('CarRacing-v0'), overwrite=True)

In [7]:
# loading the weights
agent.load_weights('cdqn_CarRacing-v0_weights.h5f')

In [8]:
# Finally, evaluate our algorithm for 10 episodes.
agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)

Testing for 10 episodes ...
Track generation: 1055..1323 -> 268-tiles track




Episode 1: reward: 999.800, steps: 2
Track generation: 1047..1313 -> 266-tiles track
Episode 2: reward: 999.800, steps: 2
Track generation: 1103..1383 -> 280-tiles track
Episode 3: reward: 999.800, steps: 2
Track generation: 1141..1430 -> 289-tiles track
Episode 4: reward: 999.800, steps: 2
Track generation: 1088..1364 -> 276-tiles track
Episode 5: reward: 999.800, steps: 2
Track generation: 1284..1609 -> 325-tiles track
Episode 6: reward: 999.800, steps: 2
Track generation: 1115..1398 -> 283-tiles track
Episode 7: reward: 999.800, steps: 2
Track generation: 1234..1556 -> 322-tiles track
Episode 8: reward: 999.800, steps: 2
Track generation: 1197..1509 -> 312-tiles track
Episode 9: reward: 999.800, steps: 2
Track generation: 1250..1567 -> 317-tiles track
Episode 10: reward: 999.800, steps: 2


<tensorflow.python.keras.callbacks.History at 0x7fcd1eb56700>