In [1]:
import socket
import threading
from getData import get_data
from time import sleep

In [25]:
def data_getter_function():
    global data
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.connect(("127.0.0.1", 9000))
        while True:
            data = get_data(s)


data_getter_thread = threading.Thread(target=data_getter_function, daemon=True)
data_getter_thread.start()
sleep(0.1)

In [22]:
from PIL import Image
from screenshot import screenshot
from getLidar import getMesuresDistances

In [26]:
def getInfos():
    speed = data['speed']
    finish = data['finish']
    # screenshot
    frame = Image.fromarray(screenshot())
    # get distances
    distances = getMesuresDistances(frame)
    return distances, speed, finish

In [5]:
from gym import Env
from gym.spaces import Discrete, Box, Tuple
import numpy as np

from driver import controlKeySimple, reloadKey, releaseAllKeys

In [6]:
nb_lidar = 11
race_step = 800

In [7]:
class TMEnvDistances(Env):
    def __init__(self):
        # Actions we can take
        self.action_space = Discrete(4)
        # Speed [{0:1000}]
        self.observation_space = Box(low=0, high=400, shape=(nb_lidar,))
        # Set start speed
        self.state = [0]*nb_lidar
        self.speed = 0
        # Set race length
        self.race_length = race_step

    def step(self, action):
        # Make ingame action 
        controlKeySimple(action)
        # Get data from game
        n_distances, n_speed, n_finish = getInfos()
        # Reduce race length by 1 second
        self.race_length -= 1 
        # Calculate reward
        reward = 0
        reward += n_speed - self.speed
        # Save speed
        self.speed = n_speed
        self.state = n_distances
        # Check if race is done
        if self.race_length <= 0 or n_finish: 
            done = True
            reward += (self.race_length)*100
            # Restart game
            reloadKey()
        else:
            done = False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return np.concatenate(( [self.speed],self.state)), reward, done, info

    def render(self):
        # The game is the visual representation
        # Maybe save lidar screenshot ?
        pass
    
    def reset(self):
        # Restart the game
        releaseAllKeys()
        reloadKey()
        # Reset speed
        self.state = [0]*nb_lidar
        self.speed = 0
        # Reset race time
        self.race_length = race_step
        return np.concatenate(( [self.speed],self.state))

In [13]:
env = TMEnvDistances()
states = (env.observation_space.shape[0]+1,)
actions = env.action_space.n
print("States shape",states)
print("Actions shape",actions)

States shape (12,)
Actions shape 4


In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Reshape
from tensorflow.keras.optimizers import Adam

In [15]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states[0])))
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='sigmoid'))
    return model

In [16]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [17]:
def build_agent(model, actions):

    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=100, target_model_update=1e-1)
    return dqn

In [18]:
model = build_model(states, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 12)                0         
_________________________________________________________________
dense (Dense)                (None, 24)                312       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 100       
Total params: 1,012
Trainable params: 1,012
Non-trainable params: 0
_________________________________________________________________


In [27]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
sleep(2)
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

releaseAllKeys()

Training for 50000 steps ...
Interval 1 (0 steps performed)
12 episodes - episode_reward: 4.859 [0.404, 11.545] - loss: 0.642 - mae: 0.697 - mean_q: 1.000

Interval 2 (10000 steps performed)
13 episodes - episode_reward: 2.435 [0.380, 5.340] - loss: 0.361 - mae: 0.897 - mean_q: 1.000

Interval 3 (20000 steps performed)
12 episodes - episode_reward: 2.863 [0.022, 5.721] - loss: 0.383 - mae: 0.905 - mean_q: 1.000

Interval 4 (30000 steps performed)
13 episodes - episode_reward: 3.763 [0.736, 11.214] - loss: 0.384 - mae: 0.911 - mean_q: 1.000

Interval 5 (40000 steps performed)
done, took 1510.815 seconds


In [29]:
releaseAllKeys()
sleep(2)
scores = dqn.test(env, nb_episodes=5, visualize=False)
print(np.mean(scores.history['episode_reward']))
releaseAllKeys()

Testing for 5 episodes ...
Episode 1: reward: 99.790, steps: 800
Episode 2: reward: 6300.065, steps: 738
Episode 3: reward: 99.921, steps: 800
Episode 4: reward: 99.793, steps: 800
Episode 5: reward: 99.921, steps: 800
1339.8981996154785


In [30]:
dqn.save_weights('w_speeddistances.h5f', overwrite=True) 

In [31]:
del model
del dqn
del env

In [35]:
env = TMEnvDistances()
actions = env.action_space.n
states = (env.observation_space.shape[0]+1,)
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [36]:
dqn.load_weights('w_speeddistances.h5f')

In [38]:
releaseAllKeys()
sleep(2)
scores = dqn.test(env, nb_episodes=1, visualize=False)
print(np.mean(scores.history['episode_reward']))
releaseAllKeys()

Testing for 1 episodes ...
Episode 1: reward: 100.090, steps: 800
100.08958969116212
