-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
346 lines (269 loc) · 11.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
from __future__ import print_function
import glob
import logging
import os
import platform
import subprocess
import sys
import threading
import time
import traceback
from threading import Thread
import numpy as np
import psutil
import tensorflow as tf
# ==============================================================================
# -- Find CARLA module ---------------------------------------------------------
# ==============================================================================
try:
sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % (
sys.version_info.major,
sys.version_info.minor,
'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
pass
# ==============================================================================
# -- Add PythonAPI for release mode --------------------------------------------
# ==============================================================================
try:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + '/carla')
except IndexError:
pass
import carla
from collections import deque
from ModifiedTensorboard import ModifiedTensorBoard
from RL_Agent import DQNAgent, MODEL_NAME, REPLAY_MEMORY_SIZE
from CheckpointManager import CheckpointManager
from Simulator import CarEnvironment
from Simulator import WIDTH, HEIGHT
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# ==============================================================================
# -- Defining Constants --------------------------------------------------------
# ==============================================================================
PORT = 2000
HOST = "127.0.0.1"
EXECUTABLE = "CarlaUE4.exe" if platform.system() == "Windows" else "CarlaUE4.sh"
MEMORY_FRACTION = 0.4
MIN_REWARD = 4
episode = 0
epsilon = 1
EPSILON_DECAY = 0.985
MIN_EPSILON = 0.001
MIN_EPSILON_2 = 0.1
AGGREGATE_STATS_EVERY = 10
SAVE_MODEL_EVERY = 1000
# model to which should be loaded. None to create a new model
load_model_name = None
FPS = 60
# ==============================================================================
# -- Learning Algorithm -------------------------------------------------------
# ==============================================================================
def start_carla() -> carla.World:
"""
Start the CARLA simulator. Loads the correct map and unloads map layers which might cause problems.
:return: the world object in the CARLA simulator
"""
while True:
log_info("Carla Start")
kill_processes()
subprocess.Popen(f"../../{EXECUTABLE} -quality-level=Low -ResX=300 -ResY=200")
time.sleep(6)
try:
client = carla.Client(HOST, PORT)
client.set_timeout(5.0)
map_name = client.get_world().get_map().name
if map_name != "Town02_Opt":
client.load_world("Town02_Opt")
time.sleep(1)
client.get_world().unload_map_layer(carla.MapLayer.Foliage)
time.sleep(1)
client.get_world().unload_map_layer(carla.MapLayer.Props)
time.sleep(1)
return client.get_world()
except RuntimeError:
time.sleep(0.1)
def kill_processes():
""" Stop running CARLA processes in order to start a new one. """
for process in psutil.process_iter():
if process.name().lower().startswith(EXECUTABLE.split('.')[0].lower()):
try:
process.terminate()
except Exception:
pass
still_alive = []
for process in psutil.process_iter():
if process.name().lower().startswith(EXECUTABLE.split('.')[0].lower()):
still_alive.append(process)
# kill process and wait until it's being killed
if len(still_alive):
for process in still_alive:
try:
process.kill()
except Exception:
pass
psutil.wait_procs(still_alive)
def learn_loop(sim_world, tensorboard, replay_memory):
"""
Start the learning process by creating all needed components and then continuously executing episodes.
A given model will be loaded if the 'load_model_name' is specified. An agent is created with the model and a thread
is started, which constantly executes the agents training. The main thread continues to executes episodes.
When an error occurs, the current model is saved, so it can be loaded again.
"""
global load_model_name
car_environment = None
agent = None
trainer_thread = None
episode_rewards = [-20]
actions = deque([1], maxlen=200)
try:
# create car environment in the simulator and the Reinforcement Learning agent
checkpoint_manager = CheckpointManager()
car_environment = CarEnvironment(sim_world, checkpoint_manager)
agent = DQNAgent(load_model_name, tensorboard, replay_memory)
# start training thread and wait for training to be initialized
trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
trainer_thread.start()
while not agent.training_initialized:
time.sleep(0.01)
if load_model_name is None:
start_state = np.ones((1, HEIGHT, WIDTH, 1)) * 255, 1, 1, 1
agent.get_qs(start_state)
# execute episodes until the program is stopped
while True:
execute_episode(agent, car_environment, actions, episode_rewards)
except (RuntimeError, KeyboardInterrupt):
avg_reward, min_reward, max_reward = calculate_rewards(episode_rewards, False)
save_model(
f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{avg_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model',
agent)
log_info("Save except")
finally:
# Set termination flag for training thread and wait for it to finish
if agent is not None:
agent.terminate = True
if trainer_thread is not None:
trainer_thread.join()
if car_environment is not None:
car_environment.destroy()
def execute_episode(agent: DQNAgent, car_environment: CarEnvironment, actions: deque, episode_rewards: list):
""" Execute a full episode and do all administrative tasks regarding this episode. """
global episode, epsilon
episode += 1
agent.tensorboard.step = episode
# driving actions this episode
episode_actions, episode_reward = execute_episode_actions(agent, car_environment)
actions.extend(episode_actions)
# append episode reward to a list and log stats every given number of episodes
episode_rewards.append(episode_reward)
if episode % AGGREGATE_STATS_EVERY == 0 or episode == 1:
avg_reward, min_reward, max_reward = calculate_rewards(episode_rewards, False)
agent.tensorboard.update_stats(reward_avg=avg_reward, reward_min=min_reward, reward_max=max_reward,
epsilon=epsilon)
# decay epsilon each iteration
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(MIN_EPSILON, epsilon)
# reset epsilon
if epsilon < MIN_EPSILON_2 and len(list(set(actions))) == 1:
epsilon = 0.5
print(f"{episode_reward} :Reward | Epsilon: {epsilon}")
if episode % SAVE_MODEL_EVERY == 0:
avg_reward, min_reward, max_reward = calculate_rewards(episode_rewards, True)
save_model(
f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{avg_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model',
agent)
log_info("Save episodes")
if threading.active_count() < 2:
raise RuntimeError("A Thread stopped running")
def execute_episode_actions(agent: DQNAgent, car_environment: CarEnvironment) -> tuple:
"""
Execute a full episode.
First reset the world in the simulator and reset the episode time.
Continuously take driving actions until the time runs out or the car gets into a collision.
How an action is chosen is based on the epsilon value.
If a generated value is lower than the epsilon value, a random action will be selected, but if the generated value
is higher, the value with the best q values for the current state will be selected.
:param agent: the agent doing the learning
:param car_environment: the car environment containing the car and all sensors
:return: tuple containing a list of the actions taken this episode and the summed reward in this episode
"""
# reset environment and get initial state
current_state = car_environment.restart()
episode_actions = deque(maxlen=200)
episode_reward = 0
standing = True
car_environment.episode_start = time.time()
car_environment.extra_time = 0
while True:
if standing and current_state[3] != 0:
standing = False
# if the car is standing still go forwards
if current_state[3] == 0:
action = 1
time.sleep(12 / FPS)
elif np.random.random() > epsilon:
qs = agent.get_qs(current_state)
action = np.argmax(qs)
episode_actions.append(action)
else:
action = np.random.randint(0, 9)
time.sleep(12 / FPS)
# execute the action in the environment
new_state, reward, done, _ = car_environment.step(action)
if not standing:
episode_reward += reward
agent.update_replay_memory((current_state, action, reward, new_state, done))
current_state = new_state
if done:
break
# clean up the simulator by destroying the car and the sensors
car_environment.destroy()
return episode_actions, episode_reward
def save_model(model_name, agent):
global load_model_name
print("save ", model_name)
agent.model.save(model_name)
load_model_name = model_name
def log_info(message: str):
with open("log.txt", "a") as file:
file.write(time.strftime("%H %M") + message + "\n")
def calculate_rewards(episode_rewards, save: bool) -> tuple:
number_of_elements = SAVE_MODEL_EVERY if save else AGGREGATE_STATS_EVERY
avg_reward = np.mean(episode_rewards[-number_of_elements:])
min_reward = min(episode_rewards[-number_of_elements:])
max_reward = max(episode_rewards[-number_of_elements:])
return avg_reward, min_reward, max_reward
# ==============================================================================
# -- main() --------------------------------------------------------------
# ==============================================================================
def main():
"""
Keep restarting the simulator and the learning process when the simulator crashes.
"""
log_level = logging.INFO
logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)
logging.info('listening to server %s:%s', HOST, PORT)
print("Number of GPUs available: ", len(tf.config.list_physical_devices('GPU')))
# create models folder to save the progress in
if not os.path.isdir('models'):
os.makedirs('models')
tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
# start the learning process
try:
while True:
try:
sim_world = start_carla()
learn_loop(sim_world, tensorboard, replay_memory)
except RuntimeError as e:
print("runtime error", e)
print(traceback.format_exc())
time.sleep(0.1)
except Exception as e:
print("main exception", e)
print(traceback.format_exc())
time.sleep(0.1)
except KeyboardInterrupt:
print('\nCancelled by user.')
if __name__ == '__main__':
main()