In [1]:
import glob
import os
import sys
import random
import time
import numpy as np
import cv2
import math
from collections import deque
import matplotlib.pyplot as plt

from threading import Thread
from tqdm import tqdm

try:
    sys.path.append(glob.glob('carla-*%d.%d-%s.egg' % (
        sys.version_info.major,
        sys.version_info.minor,
        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
    pass

import carla
from simSync import SimulatorSynchronous

## Hyperparameters

In [2]:
FPS = 10

SHOW_PREVIEW = True
SECONDS_PER_EPISODE = 50
REPLAY_MEMORY_SIZE = 20_000
MIN_REPLAY_MEMORY_SIZE = 1_000
MINIBATCH_SIZE = 64
PREDICTION_BATCH_SIZE = 1
TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4
UPDATE_TARGET_EVERY = 5
MIN_REWARD = -4000

MODEL_NAME = "SqueezeNet"
LAST_CHECKPOINT = 0
CHECKPOINT_INTERVAL = 20*60
MODEL_CHECKPOINT = "models/SqueezeNet__29_checkpoints.pt"
LOAD_CHECKPOINT = False

[IM_H, IM_W] = [224, 224]
ACTION_SPACE_SIZE = 11
STEER_LIMIT = 0.3


EPISODES = 100000

DISCOUNT = 0.99
epsilon = 0.0
EPSILON_DECAY = 0.99 ## 0.9975 99975
MIN_EPSILON = 0.01


AGGREGATE_STATS_EVERY = 10

## Environment

In [3]:
class CarEnv:

    def __init__(self):
        self.sim = SimulatorSynchronous(fps=FPS, no_agents=1, port=2000)
        self.sim.spawn_agents()
        self.sim.agent[0].attach_controller()
        
        self.steering_options = np.linspace(-STEER_LIMIT, STEER_LIMIT, ACTION_SPACE_SIZE)
        
        self.step_no = 0
        
        self.waypoints = []
        self.wp = []
        self.waypoint_distance = 2.0
        self.reached_checkpoints = 0
        

    def reset(self):
        spawn_point = random.choice(self.sim.map.get_spawn_points())
        self.sim.agent[0].waypoint = self.sim.map.get_waypoint(spawn_point.location,
                            project_to_road=True, 
                            lane_type=(carla.LaneType.Driving)) # carla.LaneType.Sidewalk

        self.sim.agent[0].vehicle.set_transform(self.sim.agent[0].waypoint.transform)

        self.step_no = 0
        
        """
        Create Waypoints
        """
        
        self.wp = self.sim.agent[0].waypoint.next(self.waypoint_distance)[0]
        self.waypoints = []
        self.reached_checkpoints = 0
        
        for i in range(100):
            self.waypoints.append(self.wp)
            self.wp = self.wp.next(self.waypoint_distance)[0]

        
        self.sim.agent[0].controller.set_target_velocity(30)
        
        self.sim.agent[0].collision = []
        self.sim.world.tick()
        
        h, w, c = self.sim.agent[0].image.shape
        
        state = self.sim.agent[0].image[int(h/2) :, :, :]
        
        return state

        
    def step(self, action):
        
        reward = 0
        
        """
        Send control command to controller
        """
        v = self.sim.agent[0].vehicle.get_velocity()
        current_velocity = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
        
        accel, decel = self.sim.agent[0].controller.velocity_controller(1/FPS, current_velocity)
        
        turn = self.steering_options[action]
        
        self.sim.agent[0].vehicle.apply_control(carla.VehicleControl(throttle=accel, brake=decel, steer=turn))

        self.sim.world.tick()
        
        # [distance, reached_waypoint] = self.sim.agent[0].check_waypoint()
        
        """
        Check if we reached any checkpoint, add rewards and targets accordingly
        """
        loc = self.sim.agent[0].vehicle.get_location()
        r = self.sim.agent[0].vehicle.bounding_box.extent.z
        
        reached_wp_no = -1
        
        for i in range(100):
            way = self.waypoints[i].transform.location
            dist = np.sqrt((loc.x - way.x) ** 2 + (loc.y - way.y) ** 2 + (loc.z - way.z) ** 2)
            if dist <= r :
                reached_wp_no = i
                self.reached_checkpoints += 1
                break
        
        """
        Agent has reached one of the waypoints, update the current waypoints list and add rewards
        """
        if reached_wp_no != -1:
            reward += 100
            for i in range(reached_wp_no+1):
                self.waypoints.pop(0)
                self.wp = self.wp.next(self.waypoint_distance)[0]
                self.waypoints.append(self.wp)
        
#         else:
#             reward -= 5
        """ 
        Add penalty for any collision
        """
        collision = False
        if len(self.sim.agent[0].collision) != 0:
            reward -= 200.0
            self.sim.agent[0].collision = []
            collision = True

            
        self.step_no += 1
        
        done = True if (self.step_no >= SECONDS_PER_EPISODE*FPS or collision) else False
        
        h, w, c = self.sim.agent[0].image.shape
        
        state = self.sim.agent[0].image[int(h/2) :, :, :]
        
        return state, reward, done, None

## Model

In [4]:
import torch
import torch.nn as nn
from torch.optim import Adam
import torchvision.models as models
from torchvision import transforms

from PIL import Image

### sample execution (requires torchvision)

```
model = models.squeezenet1_1(pretrained=False, num_classes=ACTION_SPACE_SIZE)

input_image = cv2.imread("wall.jpg")
input_image = cv2.resize(input_image, (IM_H, IM_W), interpolation = cv2.INTER_AREA)
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
input_image = Image.fromarray(input_image)

preprocess = transforms.Compose([
    # transforms.Resize(256),
    # transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape ACTION_SPACE_SIZE, with confidence scores 
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

plt.imshow(input_tensor.permute(1, 2, 0))

```

In [5]:
preprocess = transforms.Compose([
    # transforms.Resize(256),
    # transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def process_image(img):
    img = cv2.resize(img, (IM_H, IM_W), interpolation = cv2.INTER_AREA)
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
    input_tensor = preprocess(img)
    return input_tensor

## DQN Agent

In [6]:
class DQNAgent:
    def __init__(self):
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')
        print("Using " + str(self.device))

        self.model = models.squeezenet1_1(pretrained=False, num_classes=ACTION_SPACE_SIZE)
        self.target_model = models.squeezenet1_1(pretrained=False, num_classes=ACTION_SPACE_SIZE)
        self.target_model.load_state_dict(self.model.state_dict())
        self.model.to(self.device)
        self.target_model.to(self.device)
        pytorch_total_params = sum(p.numel() for p in self.model.parameters())
        print("Trainable parameters: " + str(pytorch_total_params))

        self.loss_function = nn.MSELoss()
        self.optimizer = Adam(self.model.parameters(), lr=0.001)

        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)

        self.target_update_counter = 0

        self.terminate = False
        self.last_logged_episode = 0
        self.training_initialized = False
        self.step = 0
        self.train_no = 0
        self.best_reward = -10000

    def update_replay_memory(self, transition):
        # transition = (current_state, action, reward, new_state, done)
        self.replay_memory.append(transition)

    def train(self):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
        self.train_no += 1
        
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)

        current_states = torch.stack([process_image(transition[0]) for transition in minibatch])
        with torch.no_grad():
            current_qs_list = self.model(current_states.to(self.device)).cpu().detach().numpy()

        new_current_states = torch.stack([process_image(transition[3]) for transition in minibatch])
        with torch.no_grad():
            future_qs_list = self.target_model(new_current_states.to(self.device)).cpu().detach().numpy()

        X = []
        y = []

        for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward

            current_qs = current_qs_list[index]
            current_qs[action] = new_q

            X.append(process_image(current_state))
            y.append(current_qs)

        X = torch.stack(X)
        y = torch.Tensor(np.array(y))
        
        log_this_step = False
        if self.step > self.last_logged_episode:
            log_this_step = True
            self.last_log_episode = self.step

        """ Train and Fit Model """
        # print("Train iter no " + str(self.train_no))
        self.optimizer.zero_grad()
        y_pred = self.model(X.to(self.device)).cpu()
        y_pred = y_pred.cpu()
        loss = self.loss_function(y_pred, y)
        loss.backward()
        self.optimizer.step()

        if log_this_step:
            self.target_update_counter += 1

        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.load_state_dict(self.model.state_dict())
            self.target_update_counter = 0

    def get_qs(self, state):
        state = process_image(state).unsqueeze(0) # convert image to tensor of size 1 x 3 x IM_H x IM_W
        with torch.no_grad():
            prediction = self.target_model(state.to(self.device)).cpu().detach().numpy()
        return prediction[0]

    def train_in_loop(self):
        self.training_initialized = True

        while True:
            if self.terminate:
                return
            self.train()

## Start Training

In [7]:
# whoop up the carla server
os.system('cmd /k "\"F:\Autonomous Vehicles\CARLA-SImulator\CARLA_0.9.11_windows\CarlaUE4.exe\" -carla-port=2000 -quality-level=Epic"')
time.sleep(5.0)

In [None]:
print("Beginning Simulation")

# For more repetitive results
random.seed(1)
np.random.seed(1)
torch.random.manual_seed(1)

# Create models folder
if not os.path.isdir('models'):
    os.makedirs('models')

# Create agent and environment
agent = DQNAgent()
env = CarEnv()

if LOAD_CHECKPOINT is True:
    agent.model.load_state_dict(torch.load(MODEL_CHECKPOINT))
    agent.target_model.load_state_dict(agent.model.state_dict())

# Start training thread and wait for training to be initialized
trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
trainer_thread.start()
while not agent.training_initialized:
    time.sleep(0.01)

ep_rewards = []
avg_rewards = []
max_rewards = []
min_rewards = []

# Iterate over episodes
for episode in range(1, EPISODES + 1):

    # Update tensorboard step every episode
    agent.step = episode

    # Restarting episode - reset episode reward and step number
    episode_reward = 0
    step = 1

    # Reset environment and get initial state
    current_state = env.reset()

    # Reset flag and start iterating until episode ends
    done = False
    episode_start = time.time()

    # Play for given number of seconds only
    while True:

        # This part stays mostly the same, the change is to query a model for Q values
        if np.random.random() > epsilon:
            # Get action from Q table
            action = np.argmax(agent.get_qs(current_state))
        else:
            # Get random action
            action = np.random.randint(0, ACTION_SPACE_SIZE)
            # This takes no time, so we add a delay matching 60 FPS (prediction above takes longer)
            time.sleep(1/FPS)

        new_state, reward, done, _ = env.step(action)
        if  SHOW_PREVIEW:
            # print(reward)
            cv2.imshow("Agent Cam", new_state)
            cv2.waitKey(1)
        
        # Transform new continous state to new discrete state and count reward
        episode_reward += reward

        # Every step we update replay memory
        agent.update_replay_memory((current_state, action, reward, new_state, done))

        current_state = new_state
        step += 1

        if done or (episode_reward <= MIN_REWARD):
            break

    # End of episode 

    # Append episode reward to a list and log stats (every given number of episodes)
    ep_rewards.append(episode_reward)

    print(f'Survived {step/FPS:_>.2f} sec, checkpoints {env.reached_checkpoints}, episode {episode} of {EPISODES}, train iter {agent.train_no}')
    
    # Save model, but only when min reward is greater or equal a set value
    if episode_reward >= agent.best_reward or (time.time() - LAST_CHECKPOINT) > CHECKPOINT_INTERVAL:
        LAST_CHECKPOINT = time.time()
        if episode_reward >= agent.best_reward:
            agent.best_reward = episode_reward
        torch.save(agent.model.state_dict(),
                   # f'models/{MODEL_NAME}__{episode_reward:_>7.2f}reward_{int(time.time())}.pt')
                   f'models/{MODEL_NAME}__{env.reached_checkpoints}_checkpoints.pt')

# Decay epsilon
    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)

    """
    Save Data to text file
    """
    file_object = open('logs/rewards.txt', 'a')
    file_object.write(str(episode_reward))
    file_object.write(', ')
    file_object.close()
    
    file_object = open('logs/checkpoints.txt', 'a')
    file_object.write(str(env.reached_checkpoints))
    file_object.write(', ')
    file_object.close()
    

if SHOW_PREVIEW:    
    cv2.destroy_all_windows()
# Set termination flag for training thread and wait for it to finish
agent.terminate = True
trainer_thread.join()

print("Finished training model")


Beginning Simulation
Using cuda
Trainable parameters: 728139
Establishing Connection to Server
Probably connected not sure tho
Vehicle 0 spawned
Survived 8.00 sec, checkpoints 1, episode 1 of 100000, train iter 0
Survived 50.10 sec, checkpoints 1, episode 2 of 100000, train iter 0
Survived 6.50 sec, checkpoints 1, episode 3 of 100000, train iter 0
Survived 6.20 sec, checkpoints 1, episode 4 of 100000, train iter 0
Survived 7.50 sec, checkpoints 1, episode 5 of 100000, train iter 0
Survived 5.80 sec, checkpoints 1, episode 6 of 100000, train iter 0
Survived 40.80 sec, checkpoints 0, episode 7 of 100000, train iter 7
Survived 4.70 sec, checkpoints 2, episode 8 of 100000, train iter 8
Survived 11.30 sec, checkpoints 2, episode 9 of 100000, train iter 11
Survived 5.40 sec, checkpoints 2, episode 10 of 100000, train iter 12
Survived 3.20 sec, checkpoints 1, episode 11 of 100000, train iter 13
Survived 4.30 sec, checkpoints 2, episode 12 of 100000, train iter 14
Survived 4.00 sec, checkpoint

Survived 4.00 sec, checkpoints 1, episode 115 of 100000, train iter 254
Survived 4.80 sec, checkpoints 1, episode 116 of 100000, train iter 255
Survived 10.30 sec, checkpoints 1, episode 117 of 100000, train iter 257
Survived 7.90 sec, checkpoints 1, episode 118 of 100000, train iter 260
Survived 2.00 sec, checkpoints 1, episode 119 of 100000, train iter 260
Survived 50.10 sec, checkpoints 2, episode 120 of 100000, train iter 273
Survived 4.20 sec, checkpoints 1, episode 121 of 100000, train iter 274
Survived 7.90 sec, checkpoints 1, episode 122 of 100000, train iter 276
Survived 5.30 sec, checkpoints 1, episode 123 of 100000, train iter 277
Survived 4.90 sec, checkpoints 1, episode 124 of 100000, train iter 278
Survived 4.10 sec, checkpoints 1, episode 125 of 100000, train iter 279
Survived 50.10 sec, checkpoints 1, episode 126 of 100000, train iter 292
Survived 6.60 sec, checkpoints 1, episode 127 of 100000, train iter 294
Survived 50.10 sec, checkpoints 1, episode 128 of 100000, tra

Survived 5.00 sec, checkpoints 1, episode 229 of 100000, train iter 693
Survived 4.70 sec, checkpoints 1, episode 230 of 100000, train iter 694
Survived 9.90 sec, checkpoints 1, episode 231 of 100000, train iter 696
Survived 6.30 sec, checkpoints 1, episode 232 of 100000, train iter 698
Survived 3.20 sec, checkpoints 1, episode 233 of 100000, train iter 699
Survived 3.40 sec, checkpoints 1, episode 234 of 100000, train iter 699
Survived 1.90 sec, checkpoints 1, episode 235 of 100000, train iter 700
Survived 50.10 sec, checkpoints 1, episode 236 of 100000, train iter 712
Survived 5.90 sec, checkpoints 1, episode 237 of 100000, train iter 714
Survived 3.50 sec, checkpoints 1, episode 238 of 100000, train iter 715
Survived 4.60 sec, checkpoints 1, episode 239 of 100000, train iter 716
Survived 5.70 sec, checkpoints 1, episode 240 of 100000, train iter 718
Survived 7.00 sec, checkpoints 1, episode 241 of 100000, train iter 719
Survived 3.00 sec, checkpoints 1, episode 242 of 100000, train 

Survived 2.60 sec, checkpoints 2, episode 343 of 100000, train iter 1127
Survived 4.20 sec, checkpoints 1, episode 344 of 100000, train iter 1128
Survived 39.80 sec, checkpoints 1, episode 345 of 100000, train iter 1137
Survived 5.30 sec, checkpoints 1, episode 346 of 100000, train iter 1139
Survived 6.00 sec, checkpoints 1, episode 347 of 100000, train iter 1140
Survived 50.10 sec, checkpoints 2, episode 348 of 100000, train iter 1153
Survived 50.10 sec, checkpoints 1, episode 349 of 100000, train iter 1166
Survived 7.70 sec, checkpoints 1, episode 350 of 100000, train iter 1168
Survived 3.00 sec, checkpoints 1, episode 351 of 100000, train iter 1169
Survived 4.90 sec, checkpoints 1, episode 352 of 100000, train iter 1170
Survived 6.10 sec, checkpoints 1, episode 353 of 100000, train iter 1171
Survived 5.10 sec, checkpoints 1, episode 354 of 100000, train iter 1172
Survived 3.80 sec, checkpoints 1, episode 355 of 100000, train iter 1173
Survived 9.50 sec, checkpoints 1, episode 356 of