In [1]:
import socket
import threading
from getData import get_data
from time import sleep

In [2]:
def data_getter_function():
    global data
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.connect(("127.0.0.1", 9000))
        while True:
            data = get_data(s)


data_getter_thread = threading.Thread(target=data_getter_function, daemon=True)
data_getter_thread.start()
sleep(0.1)

In [3]:
from PIL import Image
from screenshot import screenshot
from getLidar import getMesuresDistances

In [4]:
def getInfos():
    speed = data['speed']
    finish = data['finish']
    # screenshot
    frame = Image.fromarray(screenshot())
    # get distances
    distances = getMesuresDistances(frame)
    return distances, speed, finish

In [5]:
from gym import Env
from gym.spaces import Discrete, Box, Tuple
import numpy as np

from driver import controlKeySimple, reloadKey, releaseAllKeys

In [6]:
nb_lidar = 11
race_step = 800

In [8]:
class TMEnvDistances(Env):
    def __init__(self):
        # Actions we can take
        self.action_space = Discrete(4)
        # Speed [{0:1000}]
        self.observation_space = Box(low=0, high=400, shape=(nb_lidar,))
        # Set start speed
        self.state = [0]*nb_lidar
        self.speed = 0
        # Set race length
        self.race_length = race_step

    def step(self, action):
        # Make ingame action 
        controlKeySimple(action)
        # Get data from game
        n_distances, n_speed, n_finish = getInfos()
        # Reduce race length by 1 second
        self.race_length -= 1 
        # Calculate reward
        reward = 0
        reward += n_speed - self.speed
        # Save speed
        self.speed = n_speed
        self.state = n_distances
        # Check if race is done
        if self.race_length <= 0 or n_finish: 
            done = True
            reward += (self.race_length)*100
            # Restart game
            reloadKey()
        else:
            done = False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # The game is the visual representation
        pass
    
    def reset(self):
        # Restart the game
        releaseAllKeys()
        reloadKey()
        # Reset speed
        self.state = [0]*nb_lidar
        self.speed = 0
        # Reset race time
        self.race_length = race_step
        return self.state

In [9]:
env = TMEnvDistances()
states = env.observation_space.shape
actions = env.action_space.n
print("States shape",states)
print("Actions shape",actions)

States shape (11,)
Actions shape 4


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Reshape
from tensorflow.keras.optimizers import Adam

In [11]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states[0])))
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='sigmoid'))
    return model

In [12]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [13]:
def build_agent(model, actions):

    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=100, target_model_update=1e-1)
    return dqn

In [14]:
model = build_model(states, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 11)                0         
_________________________________________________________________
dense (Dense)                (None, 24)                288       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 100       
Total params: 988
Trainable params: 988
Non-trainable params: 0
_________________________________________________________________


In [25]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
sleep(2)
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 10000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
  654/10000 [>.............................] - ETA: 5:03 - reward: 0.0014done, took 21.359 seconds


<tensorflow.python.keras.callbacks.History at 0x20c8034dca0>

In [17]:
sleep(2)
scores = dqn.test(env, nb_episodes=5, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 5 episodes ...
Episode 1: reward: 1.192, steps: 800
Episode 2: reward: 0.478, steps: 800
Episode 3: reward: 2.555, steps: 800
Episode 4: reward: 0.341, steps: 800
Episode 5: reward: 0.618, steps: 800
1.036744884252548


In [20]:
dqn.save_weights('w_distances.h5f', overwrite=True) 

In [26]:
del model
del dqn
del env

In [27]:
env = TMEnvDistances()
actions = env.action_space.n
states = env.observation_space.shape
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])



In [28]:
dqn.load_weights('w_distances.h5f')

In [30]:
sleep(2)
scores = dqn.test(env, nb_episodes=1, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 1 episodes ...
Episode 1: reward: 1.220, steps: 800
1.2197112679481508
