In [1]:
import glob
import os
import sys
import random
import time
import numpy as np
import cv2
import math
from collections import deque

import tensorflow as tf
import tensorflow.keras.backend as backend
from tensorflow.python.keras.backend import set_session
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from threading import Thread

from tqdm import tqdm

In [2]:
sys.path.append('E:\\projects\\carla\\WindowsNoEditor\\PythonAPI\\carla\\dist\\carla-0.9.9-py3.7-win-amd64.egg')

In [3]:
import carla

In [4]:
IM_WIDTH = 640
IM_HEIGHT = 480
SHOW_PREVIEW = False
SECONDS_PER_EPISODE = 10
REPLAY_MEMORY_SIZE = 5_000
MIN_REPLAY_MEMORY_SIZE = 1_000
MINIBATCH_SIZE = 4
PREDICTION_BATCH_SIZE = 1
TRAINING_BATCH_SIZE = 2
UPDATE_TARGET_EVERY = 5
MODEL_NAME = 'Xception'

MEMORY_FRACTION = 0.05
MIN_REWARD = -200

EPISODES = 100

DISCOUNT = 0.99
epsilon = 1
EPSILON_DECAY = 0.95
MIN_EPSILON = 0.001

In [5]:
# override log file creation per .fit call to just 1 log file for all .fit calls
class ModifiedTensorBoard(TensorBoard):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.step = 1
        self.writer = tf.summary.FileWriter(self.log_dir)
        
    def set_model(self, model):
        pass
    
    # Overrided, saves logs with our step number
    # (otherwise every .fit() will start writing from 0th step)
    def on_epoch_end(self, epoch, logs=None):
        self.update_stats(**logs)

    # Overrided
    # We train for one batch only, no need to save anything at epoch end
    def on_batch_end(self, batch, logs=None):
        pass

    # Overrided, so won't close writer
    def on_train_end(self, _):
        pass

    # Custom method for saving own metrics
    # Creates writer, writes custom metrics and closes writer
    def update_stats(self, **stats):
        self._write_logs(stats, self.step)

In [6]:
class CarEnv:
    SHOW_CAM = SHOW_PREVIEW
    STEER_AMT = 1.0
    im_width = IM_WIDTH
    im_height = IM_HEIGHT
    front_camera = None
    
    def __init__(self):
        self.client = carla.Client('localhost', 2000)
        self.world = self.client.get_world()
        self.blueprint_library = self.world.get_blueprint_library()
        self.car_model = self.blueprint_library.filter('model3')[0]
        self.collision_hist = []
        self.actor_list = []
        
        
    def reset(self):    
        self.transform = random.choice(self.world.get_map().get_spawn_points())
        self.vehicle = self.world.spawn_actor(self.car_model, self.transform)
        self.actor_list.append(self.vehicle)
        
        self.rgb_cam = self.blueprint_library.find('sensor.camera.rgb')
        self.rgb_cam.set_attribute('image_size_x', f'{self.im_width}')
        self.rgb_cam.set_attribute('image_size_y', f'{self.im_height}')
        self.rgb_cam.set_attribute('fov', f'110')
        
        transform = carla.Transform(carla.location(x=2.5, z=0.7)) # where to place sensor
        self.sensor = self.world.spawn_actor(self.rgb_cam, transform)
        
        self.actor_list.append(self.sensor)
        self.sensor.listen(lambda data: self.process_img(data))
        
        self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) # dont move
        time.sleep(4)
        
        colsensor = self.blueprint_library.find('sensor.other.collision')
        self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle)
        self.colsensor.listen(lambda event: self.collision_data(event))
        
        self.actor_list.append(self.colsensor)
        
        while self.front_camera is None:
            time.sleep(0.01)
            
        self.episode_start = time.time()
        
        self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 
        
        return self.front_camera
    
    def collision_data(self, event):
        self.collision_hist.append(event) 
        
    def process_img(image):
        img_flat = np.array(image.raw_data)
        img = img_flat.reshape(IM_HEIGHT, IM_WIDTH, 4)
        img = img[:, :, :3] # ignores alpha values (images have 4 channels: rgba)
        return img / 255.0
        if self.SHOW_CAM:
            cv2.imshow('', img)
            cv2.waitKey(1)
        self.front_camera = img
        
    def step(self, action):
        if action == 0:
            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.STEER_AMT))
        elif action == 1:
            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=0))
        elif action == 2:
            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=self.STEER_AMT))
        
        v = self.vehicle.get_velocity()
        kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
        
        if len(self.collision_hist) != 0:
            done = True
            reward = -200
        elif kmh < 50:
            done = False
            reward = -1
        else:
            done = False
            reward = 1
            
        if self.episode_start + SECONDS_PER_EPISODE < time.time():
            done = True
        
        return self.front_camera, reward, done, None

In [7]:
class DQNAgent:
    def __init__(self):
        self.sess = tf.Session()
        set_session(self.sess)
        self.model = self.create_model() # train
        self.target_model = self.create_model() # update periodically to self.model; used to keep results stable
        self.target_model.set_weights(self.model.get_weights())
        
        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
        
        self.tensorboard = ModifiedTensorBoard(log_dir=f'logs/{MODEL_NAME}-{int(time.time())}')
        self.target_update_counter = 0
        self.graph = tf.get_default_graph()
        
        self.terminate = False
        self.last_logged_episode = 0
        self.training_initialized = False
        
    def create_model(self):
        base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3))
        
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        
        predictions = Dense(3, activation='linear')(x)
        model = Model(inputs=base_model.input, outputs=predictions)
        model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=['accuracy'])
        
        return model
    
    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)
        
    def train(self):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
        
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
        
        current_states = np.array([transition[0] for transition in minibatch]) / 255.0
        with self.graph.as_default():
            current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
            
        new_current_states = np.array([transition[3] for transition in minibatch]) / 255.0
        with self.graph.as_default():
            future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
            
        X = []
        y = []
        
        for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward
                
            current_qs = current_qs_list[index]
            current_qs[action] = new_q
            
            X.append(current_state)
            y.append(current_qs)
        
        log_this_step = False
        if self.tensorboard.step > self.last_logged_episode:
            log_this_step = True
            self.last_log_episode = self.tensorboard.step
            
        with self.graph.as_default():
            self.model.fit(
                np.array(X) / 255., 
                np.array(y), 
                batch_size=TRAINING_BATCH_SIZE, 
                verbose=0, 
                shuffle=False, 
                callbacks=[self.tensorboard] if log_this_step else None
            )
            
        if log_this_step:
            self.target_update_counter += 1
        
        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0
            
    def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(-1, *state.shape) / 255.)[0]
    
    def train_in_loop(self):
        # iterate through once to setup..
        X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
        y = np.random.uniform(size=(1, 3)).astype(np.float32)
        with self.graph.as_default(): # apparently useless statement but good practice to prevent overlapping graph values
            set_session(self.sess)
            self.model.fit(X, y, verbose=False, batch_size=1)
        
        self.training_initialized = True
        
        while True:
            if self.terminate:
                break
            self.train()
            time.sleep(0.01)
        

In [8]:
FPS = 20
ep_rewards = [-200]

random.seed(1)
np.random.seed(1)
tf.set_random_seed(1)

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)))

if not os.path.isdir('models'):
    os.makedirs('models')

In [9]:
agent = DQNAgent()
env = CarEnv()

trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
trainer_thread.start()

while not agent.training_initialized:
    time.sleep(0.01)

print('Agent training initialized')
agent.get_qs(np.ones((env.im_height, env.im_width, 3))) # mock image

for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
    print('Episode: {}'.format(episode))
    env.collision_hist = []
    agent.tensorboard.step = episode
    episode_reward = 0
    step = 1
    current_state = env.reset()
    done = False
    episode_start = time.time()
    
    while True:
        if np.random.random() > epsilon:
            action = np.argmax(agent.get_qs(current_state))
        else:
            action = np.random.randint(0, 3)
            time.sleep(1 / FPS)
            
        new_state, reward, done, _ = env.step(action)
        
        episode_reward += reward
        agent.update_replay_memory((current_state, action, reward, new_state, done))
        
        # here we would add the train method  but since we are doing in parallel, another thread runs train_in_loop method
        # idea is while running this loop, we somehow reach done; this may or may not actually initiate training
        # eventually training will begin and we will continue adding new samples to training data
        # the other thread continuously runs and will therefore continuously sample new training data for traning
        
        step += 1
        
        if done:
            break
            
    for actor in env.actor_list:
        actor.destroy()
        env.actor_list = []
        
    ep_rewards.append(episode_reward)
    if not episode % AGGREGATE_STATS_EVERY or episode == 1:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward)
        
        if min_reward >= MIN_REWARD:
            agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg__{min_reward:_>7.2f}min.h5')
        
    
    if epsilon > MIN_EPSILON:
        epsilon = max(MIN_EPSILON, EPSILON_DECAY * epsilon)

        
agent.terminate = True
trainer_thread.join()
agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg__{min_reward:_>7.2f}min.h5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


ResourceExhaustedError: OOM when allocating tensor with shape[3,3,728,1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node block6_sepconv2_1/depthwise_kernel/Initializer/random_uniform/RandomUniform (defined at C:\Users\GZhang\Anaconda3\lib\site-packages\keras_applications\xception.py:219) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'block6_sepconv2_1/depthwise_kernel/Initializer/random_uniform/RandomUniform', defined at:
  File "C:\Users\GZhang\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\GZhang\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\traitlets\config\application.py", line 664, in launch_instance
    app.start()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 563, in start
    self.io_loop.start()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\GZhang\Anaconda3\lib\asyncio\base_events.py", line 534, in run_forever
    self._run_once()
  File "C:\Users\GZhang\Anaconda3\lib\asyncio\base_events.py", line 1771, in _run_once
    handle._run()
  File "C:\Users\GZhang\Anaconda3\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 787, in inner
    self.run()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 378, in dispatch_queue
    yield self.process_one()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 225, in wrapper
    runner = Runner(result, future, yielded)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 714, in __init__
    self.run()
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2855, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in _run_cell
    return runner(coro)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3058, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3249, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-1f3a178c34ca>", line 1, in <module>
    agent = DQNAgent()
  File "<ipython-input-7-d6f3bf2ee9fc>", line 6, in __init__
    self.target_model = self.create_model() # update periodically to self.model; used to keep results stable
  File "<ipython-input-7-d6f3bf2ee9fc>", line 20, in create_model
    base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3))
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\applications\__init__.py", line 70, in wrapper
    return base_fun(*args, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\applications\xception.py", line 32, in Xception
    return xception.Xception(*args, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\keras_applications\xception.py", line 219, in Xception
    name=prefix + '_sepconv2')(x)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 538, in __call__
    self._maybe_build(inputs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1603, in _maybe_build
    self.build(input_shapes)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\layers\convolutional.py", line 1334, in build
    dtype=self.dtype)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 349, in add_weight
    aggregation=aggregation)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\training\checkpointable\base.py", line 607, in _add_variable_with_custom_getter
    **kwargs_for_getter)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 145, in make_variable
    aggregation=aggregation)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 213, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 176, in _variable_v1_call
    aggregation=aggregation)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 155, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2488, in default_variable_creator
    import_scope=import_scope)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 217, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 294, in __init__
    constraint=constraint)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 406, in _init_from_args
    initial_value() if init_from_fn else initial_value,
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 127, in <lambda>
    shape, dtype=dtype, partition_info=partition_info)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py", line 512, in __call__
    shape, -limit, limit, dtype, seed=self.seed)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\random_ops.py", line 247, in random_uniform
    rnd = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_random_ops.py", line 777, in random_uniform
    name=name)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\Users\GZhang\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[3,3,728,1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node block6_sepconv2_1/depthwise_kernel/Initializer/random_uniform/RandomUniform (defined at C:\Users\GZhang\Anaconda3\lib\site-packages\keras_applications\xception.py:219) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
# actor_list = []

# try:
#     # set up client
#     client = carla.Client('localhost', 2000)
#     client.set_timeout(2.0)
    
#     # set up vehicle
#     world = client.get_world()
#     blueprint_library = world.get_blueprint_library()
#     bp = blueprint_library.filter('model3')[0]
#     print(bp)
    
#     spawn_point = random.choice(world.get_map().get_spawn_points())
    
#     vehicle = world.spawn_actor(bp, spawn_point)
# #     vehicle.set_autopilot(True)

#     vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=0.0))
#     actor_list.append(vehicle)
    
#     # set up camera sensor
#     cam_bp = blueprint_library.find('sensor.camera.rgb')
#     cam_bp.set_attribute('image_size_x', f'{IM_WIDTH}')
#     cam_bp.set_attribute('image_size_y', f'{IM_HEIGHT}')
#     cam_bp.set_attribute('fov', '110')
    
#     # place camera onto car
#     spawn_point = carla.Transform(carla.Location(x=2.5, z=0.7))
#     sensor = world.spawn_actor(cam_bp, spawn_point, attach_to=vehicle)
#     actor_list.append(sensor)
#     sensor.listen(lambda data: process_img(data))
# finally:
#     pass

# # clean up
# for actor in actor_list:
#     actor.destroy()
# print('All cleaned up!')