In [None]:
# Autonomous Driving Project
# This notebook contains the implementation of the autonomous driving project using DQN and PPO algorithms.

In [None]:
!pip install future==0.18.3 numpy==1.21.1 pygame==2.1.2 Pillow==9.4.0 poetry==1.3.2

Collecting future==0.18.3
  Downloading future-0.18.3.tar.gz (840 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/840.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.9/840.9 kB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting numpy==1.21.1
  Downloading numpy-1.21.1.zip (10.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m65.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pygame==2.1.2
  Downloading pygame-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting Pillow==9.4.0
  Downloading Pillow-9.4.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.3 kB)
Collecting poetry==1.3.2
  Downl

In [None]:

# parameters.py
MODEL_LOAD = False
SEED = 0
BATCH_SIZE = 1
IM_WIDTH = 160
IM_HEIGHT = 80
GAMMA = 0.99
MEMORY_SIZE = 5000
EPISODES = 1000

# VAE Bottleneck
LATENT_DIM = 95

# Dueling DQN (hyper)parameters
DQN_LEARNING_RATE = 0.0001
EPSILON = 1.00
EPSILON_END = 0.05
EPSILON_DECREMENT = 0.00001

REPLACE_NETWORK = 5
DQN_CHECKPOINT_DIR = 'preTrained_models/ddqn'
MODEL_ONLINE = 'carla_dueling_dqn_online.pth'
MODEL_TARGET = 'carla_dueling_dqn_target.pth'

# Proximal Policy Optimization (hyper)parameters
EPISODE_LENGTH = 7500
TOTAL_TIMESTEPS = 2e6
ACTION_STD_INIT = 0.2
TEST_TIMESTEPS = 5e4
PPO_LEARNING_RATE = 1e-4
PPO_CHECKPOINT_DIR = 'preTrained_models/ppo/'
POLICY_CLIP = 0.2


In [None]:

# encoder_init.py
import torch
from autoencoder.encoder import VariationalEncoder

class EncodeState():
    def __init__(self, latent_dim):
        self.latent_dim = latent_dim
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.conv_encoder = VariationalEncoder(self.latent_dim).to(self.device)
        self.conv_encoder.load()
        self.conv_encoder.eval()

        for params in self.conv_encoder.parameters():
            params.requires_grad = False

    def process(self, observation):
        image_obs = torch.tensor(observation[0], dtype=torch.float).to(self.device)
        image_obs = image_obs.unsqueeze(0)
        image_obs = image_obs.permute(0, 3, 2, 1)
        image_obs = self.conv_encoder(image_obs)
        navigation_obs = torch.tensor(observation[1], dtype=torch.float).to(self.device)
        observation = torch.cat((image_obs.view(-1), navigation_obs), -1)

        return observation


ModuleNotFoundError: No module named 'autoencoder'

In [None]:

# discrete_driver.py
import os
import sys
import time
import random
import numpy as np
import argparse
import logging
import pickle
import torch
from distutils.util import strtobool
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
from simulation.connection import ClientConnection
from simulation.environment import CarlaEnvironment
from networks.off_policy.ddqn.agent import DQNAgent
from encoder_init import EncodeState
from parameters import *

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp-name', type=str, help='name of the experiment')
    parser.add_argument('--env-name', type=str, default='carla', help='name of the simulation environment')
    parser.add_argument('--learning-rate', type=float, default=DQN_LEARNING_RATE, help='learning rate of the optimizer')
    parser.add_argument('--seed', type=int, default=SEED, help='seed of the experiment')
    parser.add_argument('--total-episodes', type=int, default=EPISODES, help='total timesteps of the experiment')
    parser.add_argument('--train', type=bool, default=True, help='is it training?')
    parser.add_argument('--town', type=str, default="Town07", help='which town do you like?')
    parser.add_argument('--load-checkpoint', type=bool, default=MODEL_LOAD, help='resume training?')
    parser.add_argument('--torch-deterministic', type=lambda x:bool(strtobool(x)), default=True, nargs='?', const=True, help='if toggled, `torch.backends.cudnn.deterministic=False`')
    parser.add_argument('--cuda', type=lambda x:bool(strtobool(x)), default=True, nargs='?', const=True, help='if toggled, cuda will not be enabled by default')
    args = parser.parse_args()

    return args

def runner():
    #========================================================================
    #                           BASIC PARAMETER & LOGGING SETUP
    #========================================================================

    args = parse_args()
    exp_name = args.exp_name

    try:
        if exp_name == 'ddqn':
            run_name = f"DDQN"
    except Exception as e:
        print(e.message)
        sys.exit()

    town = args.town
    writer = SummaryWriter(f"runs/{run_name}/{town}")
    writer.add_text(
        "hyperparameters",
        "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}" for key, value in vars(args).items()])))

    # Seeding to reproduce the results
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = args.torch_deterministic

    #========================================================================
    #                           INITIALIZING THE NETWORK
    #========================================================================

    checkpoint_load = args.load_checkpoint
    n_actions = 7  # Car can only make 7 actions
    agent = DQNAgent(n_actions)

    epoch = 0
    cumulative_score = 0
    episodic_length = []
    scores = []
    deviation_from_center = 0
    distance_covered = 0

    if checkpoint_load:
        agent.load_model()
        if exp_name == 'ddqn':
            with open(f'checkpoints/DDQN/{town}/checkpoint_ddqn.pickle', 'rb') as f:
                data = pickle.load(f)
                epoch = data['epoch']
                cumulative_score = data['cumulative_score']
                agent.epsilon = data['epsilon']

    #========================================================================
    #                           CREATING THE SIMULATION
    #========================================================================

    try:
        client, world = ClientConnection(town).setup()
        logging.info("Connection has been setup successfully.")
    except:
        logging.error("Connection has been refused by the server.")
        ConnectionRefusedError

    env = CarlaEnvironment(client, world, town, continuous_action=False)
    encode = EncodeState(LATENT_DIM)

    try:
        time.sleep(1)
        #========================================================================
        #                           INITIALIZING THE MEMORY
        #========================================================================

        if exp_name == 'ddqn' and checkpoint_load:
            while agent.replay_buffer.counter < agent.replay_buffer.buffer_size:
                observation = env.reset()
                observation = encode.process(observation)
                done = False
                while not done:
                    action = random.randint(0, n_actions - 1)
                    new_observation, reward, done, _ = env.step(action)
                    new_observation = encode.process(new_observation)
                    agent.save_transition(observation, action, reward, new_observation, int(done))
                    observation = new_observation

        if args.train:
            #========================================================================
            #                           ALGORITHM
            #========================================================================

            for step in range(epoch + 1, EPISODES + 1):
                if exp_name == 'ddqn':
                    print('Starting Episode: ', step, ', Epsilon Now:  {:.3f}'.format(agent.epsilon), ', ', end="")

                # Reset
                done = False
                observation = env.reset()
                observation = encode.process(observation)
                current_ep_reward = 0

                # Episode start: timestamp
                t1 = datetime.now()

                while not done:
                    action = agent.get_action(observation)
                    new_observation, reward, done, info = env.step(action)
                    if new_observation is None:
                        break
                    new_observation = encode.process(new_observation)
                    current_ep_reward += reward

                    agent.save_transition(observation, action, reward, new_observation, int(done))
                    agent.learn()

                    observation = new_observation

                # Episode end : timestamp
                t2 = datetime.now()
                t3 = t2 - t1
                episodic_length.append(abs(t3.total_seconds()))

                deviation_from_center += info[1]
                distance_covered += info[0]

                scores.append(current_ep_reward)

                if checkpoint_load:
                    cumulative_score = ((cumulative_score * (step - 1)) + current_ep_reward) / (step)
                else:
                    cumulative_score = np.mean(scores)

                print('Reward:  {:.2f}'.format(current_ep_reward), ', Average Reward:  {:.2f}'.format(cumulative_score))

                if step >= 10 and step % 10 == 0:
                    agent.save_model()

                    if exp_name == 'ddqn':
                        data_obj = {'cumulative_score': cumulative_score, 'epsilon': agent.epsilon, 'epoch': step}
                        with open(f'checkpoints/DDQN/{town}/checkpoint_ddqn.pickle', 'wb') as handle:
                            pickle.dump(data_obj, handle)

                    writer.add_scalar("Cumulative Reward/info", cumulative_score, step)
                    writer.add_scalar("Epsilon/info", agent.epsilon, step)
                    writer.add_scalar("Episodic Reward/episode", scores[-1], step)
                    writer.add_scalar("Average Episodic Reward/info", np.mean(scores[-10]), step)
                    writer.add_scalar("Episode Length (s)/info", np.mean(episodic_length), step)
                    writer.add_scalar("Average Deviation from Center/episode", deviation_from_center / 10, step)
                    writer.add_scalar("Average Distance Covered (m)/episode", distance_covered / 10, step)

                    episodic_length = []
                    deviation_from_center = 0
                    distance_covered = 0

            print("Terminating the run.")
            sys.exit()
        else:
            sys.exit()

    finally:
        sys.exit()

# Execute the runner function if this file is run
runner()

