In [1]:
import json
import numpy as np
from timeit import default_timer

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
model = Sequential()
model.add(Flatten(input_shape=(1, 16)))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(3, activation='linear'))

In [3]:
from keras import losses

model.compile(Adam(lr=1e-3), metrics=['mae', 'accuracy'], loss=losses.mean_squared_error)

In [4]:
import json
import math
import os
from tqdm import tqdm_notebook

def norm_angle(theta):
    ''' Normalize an angle in radians to [0, 2*pi) '''
    angle = theta % (2*math.pi)
    if angle < 0:
        angle = 2*math.pi - angle
    return angle

prev_pos = (-1, -1)
prev_angle = -1


def get_observation(msg):
    global prev_pos, prev_angle
    observation = []
    for car in ['my_car', 'enemy_car']:
        data = msg[car]
        # print('angle ({}): {}'.format(car, data[1], norm_angle(data[1])))
        if data[2] == -1:
            for i in [0, 3, 4]:
                data[i] = (1200 - data[i][0], data[i][1])
        observation.extend([data[2], norm_angle(data[1]), data[3][0], data[3][1], data[4][0], data[4][1]])
    observation.append(10)#min(observation[3], observation[5]) - msg['params'].get('deadline_position', 0))
    if prev_pos[0] == -1:
        observation.extend([0.0, 0.0])
    else:
        observation.extend([(observation[2] + observation[4]) / 2 - prev_pos[0],
                            (observation[3] + observation[5]) / 2 - prev_pos[1]])
    prev_pos = ((observation[2] + observation[4]) / 2, (observation[3] + observation[5]) / 2)
    if prev_angle == -1:
        observation.append(0.0)
    else:
        observation.append(observation[1] - prev_angle)
    prev_angle = observation[1]

    return observation


def process(directory):
    global prev_pos, prev_angle
    # [0,"stop",[[300.0,300.0],0.0,1,[329.0,295.0,0.0],[422.0,295.0,0.0]]]
    states = []
    actions = []
    terminal = []
    reward = []
    for fname in tqdm_notebook(os.listdir(directory)):
        with open(directory + '/' + fname) as f:
            prev_pos = (-1, -1)
            prev_angle = -1
            raw_my = f.readline()
            raw_enemy = f.readline()
            try:
                raw_my = json.loads(raw_my)
            except:
                print('raw_my', raw_my)
                continue
            try:
                raw_enemy = json.loads(raw_enemy)
            except:
                print('raw_enemy', raw_enemy)
                continue
            if not raw_my['win']:
                print('Skipping {}...'.format(fname))
                continue
                #raw_my, raw_enemy = raw_enemy, raw_my
            obs_my, obs_enemy = raw_my['dump'], raw_enemy['dump']
            for step in range(len(obs_my)):
                actions.append(obs_my[step][1])
                states.append(get_observation({'my_car': obs_my[step][2], 'enemy_car': obs_enemy[step][2]}))
                if step < len(obs_my) - 1:
                    terminal.append(False)
                    reward.append(1)
                else:
                    terminal.append(True)
                    reward.append(10000)
    return states, actions, terminal, reward

In [5]:
states, actions, terminal, reward = process('dumps/PillMap_Bus')

Skipping dump_16217571.dat...
Skipping dump_2050090.dat...
Skipping dump_2934108.dat...
Skipping dump_39944268.dat...
Skipping dump_49490503.dat...
Skipping dump_52230593.dat...
Skipping dump_5817838.dat...
Skipping dump_79793109.dat...
Skipping dump_95221678.dat...



In [6]:
len(states)

19566

In [53]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 16)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 32)                544       
_________________________________________________________________
activation_7 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 32)                1056      
_________________________________________________________________
activation_8 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 32)                1056      
_________________________________________________________________
activation_9 (Activation)    (None, 32)                0         
__________

In [7]:
from keras.utils import to_categorical

x_train = np.array(states)

atoi = {'left': 0, 'right': 1, 'stop': 2}
y_train = to_categorical([atoi[a] for a in actions])

In [8]:
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))

In [9]:
x_train

array([[[ 1.00000000e+00,  0.00000000e+00,  3.38000000e+02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[ 1.00000000e+00,  0.00000000e+00,  3.38000000e+02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[ 1.00000000e+00,  6.28066890e+00,  3.37923566e+02, ...,
         -7.63717844e-02, -1.88966282e-01,  6.28066890e+00]],

       ...,

       [[ 1.00000000e+00,  7.69124977e-01,  5.67737875e+02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[ 1.00000000e+00,  7.69124977e-01,  5.67737875e+02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[ 1.00000000e+00,  7.69124977e-01,  5.67737875e+02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]]])

In [9]:
model.fit(x=x_train, y=y_train, batch_size=16, epochs=10)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2e3795e10b8>

In [70]:
model.predict(x_train, batch_size=1)

array([[0.06426321, 0.22319226, 0.7107716 ],
       [0.06426321, 0.22319226, 0.7107716 ],
       [0.06426321, 0.22319226, 0.7107716 ],
       ...,
       [0.2116197 , 0.18295498, 0.6005717 ],
       [0.10605744, 0.21177989, 0.67951596],
       [0.06426321, 0.22319226, 0.7107716 ]], dtype=float32)

In [65]:
x_train[0][0]

array([  1.,   0., 329., 295., 422., 295.,  -1.,   0., 329., 295., 422.,
       295.,  10.,   0.,   0.,   0.])

In [10]:
model.save_weights('duel_dqn_101.h5f')