In [None]:
import time
import random
import sys
import os
import glob
import logging
import numpy as np
import cv2

In [None]:
CARLA_PATH = 'C:/Users/saile/Desktop/Sailesh/Carla Simulator/CARLA_0.9.10/WindowsNoEditor/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
try:
    sys.path.append(glob.glob(CARLA_PATH % (
        sys.version_info.major,
        sys.version_info.minor,
        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
    pass

import carla
from carla import VehicleLightState as vls 

In [None]:
SHOW_PREVIEW = False
VEHICLE_NAME = 'audi'
MODEL_NAME = 'a2'
STEER_AMOUNT = 1
IMAGE_HEIGHT = 330
IMAGE_WIDTH = 500

In [None]:
actor_list = []
collision_hist = []

In [None]:
client = carla.Client('localhost', 2000)
world = client.get_world()
map = world.get_map()
blueprint_library = world.get_blueprint_library()
spawn_points = map.get_spawn_points()
print(map)

In [None]:
vehicle_blueprint = blueprint_library.filter(f"vehicle.{VEHICLE_NAME}.{MODEL_NAME}")[0]
vehicle_spawn_location = random.choice(map.get_spawn_points())
vehicle = world.try_spawn_actor(vehicle_blueprint, vehicle_spawn_location)
vehicle.set_autopilot()
actor_list.append(vehicle)

In [None]:
rgb_camera_bp = blueprint_library.find('sensor.camera.rgb')
rgb_camera_bp.set_attribute("image_size_x", "500")
rgb_camera_bp.set_attribute("image_size_y", "330")
rgb_camera_bp.set_attribute("fov", "110")
depth_sensor_bp = blueprint_library.find('sensor.camera.depth')
depth_sensor_bp.set_attribute("image_size_x", "500")
depth_sensor_bp.set_attribute("image_size_y", "330")
depth_sensor_bp.set_attribute("fov", "120")
rgb_camera_transform = carla.Transform(carla.Location(x=3, z=2))
depth_sensor_transform = carla.Transform(carla.Location(x=3, z=2))
rgb_camera = world.spawn_actor(rgb_camera_bp, rgb_camera_transform, attach_to=vehicle)
depth_sensor = world.spawn_actor(depth_sensor_bp, depth_sensor_transform, attach_to=vehicle)

In [None]:
def show_image(image):
    img = np.array(image.raw_data)
    img = img.reshape((330, 500, 4))
    img = img[:,:,:3]
    cv2.imshow("car image", img)
    cv2.waitKey(1)
rgb_camera.listen(lambda image: show_image(image))

In [None]:
def step(action):
    if action == 0:
        vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*STEER_AMOUNT))
    elif action == 1:
        vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=0))
    elif action == 2:
        vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*STEER_AMOUNT))
        
    v = vehicle.get_velocity()
    kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
    
    if len(collision_hist) != 0:
        done = True
        reward = -200
    elif kmh < 50:
        done = False
        reward = -1
    else:
        done = False
        reward = 1
        
    if episode_start + 100 < time.time():
        done = True
        
    return reward, done

In [None]:
from collections import deque
from keras.applications.xception import Xception
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.model import Model

import tensorflow as tf
import keras.backend.tensorflow_backend as backend
from threading import Thread
from tqdm import tqdm

In [None]:
REPLAY_MEMORY_SIZE = 5000
MIN_REPLAY_MEMORY_SIZE = 1000
MINIBATCH_SIZE = 16
PREDICTION_BATCH_SIZE = 1
TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4
UPDATE_TARGET_EVERY = 5
MODEL_NAME = "Xception"
MIN_REWARD = -200
DISCOUNT = 0.99
EPISODES = 100
epsilon_decay = 0.95
MIN_EPSILON = 0.001
AGGREGATE_STATS_EVERY = 10

num_of_actions = 3

In [None]:
class DQNAgent:
    def __init__(self):
        self.model = self.create_model()
        self.target_model = self.create_model()
        self.target_model.set_weights(self.model.get_weights())
        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
        self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
        self.target_update_counter = 0
        self.graph = tf.get_default_graph()
        self.terminate = False
        self.last_logged_episode = 0
        self.training_initialized = False
    
    def create_model(self):
        base_model = Xception(weights=None, include_top=False, input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3))
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        predictions = Dense(num_of_actions, activation="linear")(x)
        model = Model(inputs = base_model.input, outputs=predictions)
        model.compile(loss="mse", optimizer=Adam(lr=0.001),metrics=["accuracy"])
        return model
    
    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)
        # transition = (current_state, action, reward, new_state, done)
    
    def train(self):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
        current_states = np.array([transition[0] for transition in minibatch])/255
        with self.graph.as_default():
            current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
            
        new_current_states = np.array([transition[3] for transition in minibatch])/255
        with self.graph.as_default():
            future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
        
        X = []
        y = []
        
        for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward

            current_qs = current_qs_list[index]
            current_qs[action] = new_q

            X.append(current_state)
            y.append(current_qs)
        
        log_this_step = False
        if self.tensoboard.step > self.last_logged_episode:
            log_this_step = True
            self.last_log_episode = self.tensorboard.step
            
        with self.graph.as_default():
            self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None)
            
        if log_this_step:
            self.target_update_counter += 1
        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0
        
    def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
    
    def train_in_loop(self):
        X = np.random.uniform(size = (1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)).astype(np.float32)
        y = np.random.uniform(size = (1, 3)).astype(np.float32)
        with self.graph.as_default():
            self.model.fit(X, y, verbose = False, batch_size = 1)
        
        self.training_initialized = True
        
        while True:
            if self.terminate:
                return 
            self.train()
            time.sleep(0.01)

In [None]:
if __name__ == "__main__":
    FPS = 30
    ep_rewards = -200
    random.seed(1)
    np.random.seed(1)
    tf.set_random_seed(1)
    
    if not os.path.isdir("models"):
        os.makedirs("models")
    agent = DQNAgent()
    env = CarEnv()
    
    trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
    
    while not agent.training_initialized:
        time.sleep(0.01)
    agent.get_qs(np.ones((env.IMAGE_HEIGHT, env.IMAGE_WIDTH, 3)))
    
    for episode in tqdm(range(1, EPISODES+1), ascii=True, unit="episodes"):
        env.collision_hist = []
        agent.tensorbaord.step = episode
        episode_reward = 0
        step = 1
        current_state = env.reset()
        done = False
        episode_start = time.time()
        
        while True:
            if np.random.random() > epsilon:
                action = np.argmax(agent.get_qs(current_state))
            else:
                action = np.random.randint(0, num_of_actions)
                time.sleep(1/FPS)
            new_state, reward, done, _ = env.step(action)
            episode_reward += reward
            agent.update_replay_memory((current_state, action, reward, new_state, done))
            step += 1
            if done:
                break
                
            for actor in env.actor_list:
                actor.destroy()
                
            ep_rewards.append(episode_reward)
            if not episode % AGGREGATE_STATS_EVERY or episode == 1:
                average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
                min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
                max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
                agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)

                # Save model, but only when min reward is greater or equal a set value
                if min_reward >= MIN_REWARD:
                    agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')

            # Decay epsilon
            if epsilon > MIN_EPSILON:
                epsilon *= EPSILON_DECAY
                epsilon = max(MIN_EPSILON, epsilon)

        agent.terminate = True
        trainer_thread.join()
        agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{min_reward:_>7.2f}min__{int(time.time())}.model')