In [None]:
import os
import io
import cv2
import math
import time
import json
import carla
import heapq
import torch
# import pickle
import random
import requests
import kagglehub
import transformers

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F

from PIL import Image
from torchvision import models
from collections import Counter
from sklearn.decomposition import PCA
from skimage.morphology import skeletonize
from torch.optim.lr_scheduler import StepLR
from huggingface_hub import hf_hub_download
from transformers.image_transforms import rgb_to_id
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, Subset
from transformers import SegformerForSemanticSegmentation, SegformerConfig, CLIPProcessor, CLIPModel, AutoModelForSeq2SeqLM, AutoTokenizer, AutoImageProcessor, DetrForObjectDetection, DetrForSegmentation, TimeSeriesTransformerForPrediction, TimeSeriesTransformerConfig, pipeline

from torchvision import transforms as A

In [None]:
transformers.logging.set_verbosity_error()

**ALGORITHM 1: VL pretrained-M fine tuned with semi-disentangled outputs**

Dataset Creation (IMAGE to EMBEDDING)

In [None]:
# path = kagglehub.dataset_download("albertozorzetto/carla-densely-annotated-driving-dataset")
# print("Path to dataset files:", path)

In [None]:
# # Download the CARLA dataset using the Kaggle API
# dataset_name = "albertozorzetto/carla-densely-annotated-driving-dataset"
# destination_path = "/content/datasets"

# import os
# # Install Kaggle API
# !pip install kaggle --upgrade
# # Upload kaggle.json for authentication
# from google.colab import files
# uploaded = files.upload()
# # Move kaggle.json to the proper directory
# !mkdir -p ~/.kaggle
# !mv kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json
# # Verify Kaggle is set up correctly
# !kaggle datasets list
# !kaggle datasets download -d {dataset_name} -p {destination_path} --unzip
# print("Path to dataset files:", destination_path)

# import tarfile
# images_path = os.path.join(destination_path, "images")
# labels_path = os.path.join(destination_path, "labels")
# # Create directories for extracted files
# os.makedirs(images_path, exist_ok=True)
# os.makedirs(labels_path, exist_ok=True)
# # Extract and manage .tar files
# tar_files = [f for f in os.listdir(destination_path) if f.endswith('.tar')]
# for tar_file in tar_files:
#     tar_path = os.path.join(destination_path, tar_file)
#     print(f"Extracting {tar_path}...")
#     with tarfile.open(tar_path) as tar:
#         # Determine the folder to extract based on the tar file name
#         if "images" in tar_file.lower():
#             tar.extractall(path=images_path)
#         elif "labels" in tar_file.lower():
#             tar.extractall(path=labels_path)
#         else:
#             print(f"Skipping unknown .tar file: {tar_file}")
#     os.remove(tar_path)  # Optional: Remove the .tar file after extraction
# print("All .tar files have been extracted.")
# print(f"Images folder: {images_path}")
# print(f"Labels folder: {labels_path}")

In [None]:
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
semantic_label_classes = ["Traffic Sign", "Building", "Fence", "Other", "Pedestrian", "Pole", "Road Line", "Road", "Sidewalk", "Vegetation", "Car", "Wall", "Unlabeled"]

root = "datasets"
images_dir = f"{root}/images"
labels_dir = f"{root}/labels"
waypoint_file = "dataset_waypoints.json"
bev_file = "dataset_bev.json"
batch_size = 2

skip = 10

*Saving the {way points} for passing to the LLM*

In [None]:
def get_mid_waypoint(waypoints):
    if len(waypoints) == 0:
        return None
    mid_index = len(waypoints) // 2
    
    mean_x = sum(wp[0] for wp in waypoints) / len(waypoints)
    mean_y = sum(wp[1] for wp in waypoints) / len(waypoints)
    mean_y = 600 - mean_y

    mid_waypoint = [mean_x, mean_y]

    return mid_waypoint

In [None]:
def generate_waypoints(label, num_waypoints):
    if len(label.shape) == 3:
        label = cv2.cvtColor(label, cv2.COLOR_RGB2GRAY)
    
    road_mask = (label == 90).astype(np.uint8)
    
    contours, _ = cv2.findContours(road_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return []
    
    largest_contour = max(contours, key=cv2.contourArea)
    binary_image = (road_mask > 0).astype(np.uint8)
    skeleton = skeletonize(binary_image).astype(np.uint8)
    
    y_coords, x_coords = np.where(skeleton > 0)
    path_points = np.column_stack((x_coords, y_coords))
    
    # x_min, x_max = path_points[:, 0].min(), path_points[:, 0].max()
    # y_min, y_max = path_points[:, 1].min(), path_points[:, 1].max()

    # x_range = (x_max - x_min) * 0.25
    # y_range = (y_max - y_min) * 0.25

    # path_points = path_points[
    #     (path_points[:, 0] > x_min + x_range) & (path_points[:, 0] < x_max - x_range) &
    #     (path_points[:, 1] > y_min + y_range) & (path_points[:, 1] < y_max - y_range)
    # ]

    if len(path_points) == 0:
        return []

    sampled_indices = np.linspace(0, len(path_points) - 1, num_waypoints, dtype=int)
    waypoints = path_points[sampled_indices]

    num_selected_waypoints = min(25, len(path_points))
    sampled_indices = np.linspace(0, len(path_points) - 1, num_selected_waypoints, dtype=int)
    waypoints = path_points[sampled_indices]
    
    # print("Min/Max path points:", path_points.min(), path_points.max())

    # plt.imshow(skeleton, cmap="gray")
    # plt.scatter(x_coords, y_coords, c="red", marker="o")
    # plt.show()

    mid_waypoint = get_mid_waypoint(waypoints)
    # print("Mid Waypoint:", mid_waypoint)

    waypoints = mid_waypoint

    # print(mid_waypoint[0], 600 - mid_waypoint[1])

    # if len(waypoints) > 0:
    #     plt.figure(figsize=(10, 10))
    #     plt.imshow(label, cmap="gray")
    #     x = waypoints[0]
    #     y = 600 - waypoints[1]
    #     plt.scatter(x, y, c='red', marker='o')
    #     plt.text(x, y, f'({x:.1f}, {y:.1f})', fontsize=8, color='yellow', ha='right', va='bottom')
    #     plt.title("Extracted Waypoints")
    #     plt.show()

    return waypoints


In [None]:
def process_dataset(labels_dir, output_dir, num_waypoints):
    dataset_waypoints = []

    for i in range(0, 28):
        k = "0" if i <= 9 else ""
        curr_folder_labels = f"{labels_dir}/Video_0{k}{i}"
        labels_path = [file for file in os.listdir(curr_folder_labels) if file.endswith(".png")]
        labels_path.sort()
        labels_path = labels_path[::skip]

        for label_file in labels_path:
            label_path = os.path.join(curr_folder_labels, label_file)
            label = Image.open(label_path)
            label_np = np.array(label)
            waypoints = generate_waypoints(label_np, num_waypoints=num_waypoints)
            output_label_path = os.path.join(output_dir, label_file)
            dataset_waypoints.append({
                "label": output_label_path,
                "waypoints": waypoints
            })
            # break

        with open(os.path.join(output_dir, waypoint_file), 'a') as wf:
            json.dump(dataset_waypoints, wf)
            wf.write("\n")
            # break

        if(i<27):
            print(f"\rProgress: {((i + 1) / 0.27):.2f} %", end = "")
        # break

    print(f"\nDone")

In [None]:
process_dataset(labels_dir, root, 1000)

getting dataset

In [None]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, labels_dir):
        self.data = []
        ei = []
        cw = []

        for i in range(0, 28):
            k = "0" if i <= 9 else ""
            curr_folder_images = f"{images_dir}/Video_0{k}{i}"
            images_path = [file for file in os.listdir(curr_folder_images) if file.endswith(".png")]
            images_path.sort()
            images_path = images_path[::skip * 10]

            for image_file in images_path:
                image_path = os.path.join(curr_folder_images, image_file)
                image = Image.open(image_path)
                image_embeddings = processor(text=semantic_label_classes, images=image, return_tensors="pt", padding=True)
                ei.append(image_embeddings['pixel_values'])

            with open(os.path.join(root, waypoint_file), "r") as wf:
                content = wf.read()
                data_list = content.splitlines()
                for data in data_list:
                    data = json.loads(data)
                    for i in data:
                        corresponding_waypoints = i['waypoints']
                        cw.append(corresponding_waypoints)
                        break
                    # break
            for e,c in zip(ei, cw):
                self.data.append((e,c))
                # break
            # break
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        embedding_inputs, embedding_labels = self.data[idx]
        return embedding_inputs, embedding_labels

In [None]:
# Split indices for train and validation
dataset = SegmentationDataset(images_dir, labels_dir)
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True, pin_memory=True)

train_indices, val_indices = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

val_dataset = torch.utils.data.Subset(dataset, val_indices)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

In [None]:
def normalize(pixel_values):
    mini = pixel_values.min()
    maxi = pixel_values.max()
    pixel_values = (pixel_values - mini) / (maxi - mini)
    return pixel_values

OUR VIT WP MODEL

In [None]:
class VitWPModel(nn.Module):
    def __init__(self):
        super(VitWPModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 512, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(512)
        self.conv3 = nn.Conv2d(512, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 512, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.conv5 = nn.Conv2d(512, 64, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(64)
        
        self.fc1 = nn.Linear(64 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, 2)

        self.dropout = nn.Dropout(0.25)

    def forward(self, pixel_values):
        x = torch.relu(self.conv1(pixel_values))
        x = self.bn1(x)
        x = torch.max_pool2d(x, 2)
        
        x = torch.relu(self.conv2(x))
        x = self.bn2(x)
        x = torch.max_pool2d(x, 2)

        x = torch.relu(self.conv3(x))
        x = self.bn3(x)
        x = torch.max_pool2d(x, 2)

        x = torch.relu(self.conv4(x))
        x = self.bn4(x)
        x = torch.max_pool2d(x, 2)

        x = torch.relu(self.conv5(x))
        x = self.bn5(x)
        x = torch.max_pool2d(x, 2)

        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        
        output = self.fc2(x)
        return output.squeeze()

In [None]:
model_to_train = VitWPModel()

*--training set ups--*

In [None]:
def wp_generation(pixel_values, model):
    pixel_values = pixel_values.squeeze(1)
    outputs = model(pixel_values)
    return outputs

In [None]:
def training(model_to_train, train_loader, optimizer, criterion):
    model_to_train.train()
    running_loss = 0.0
    running_accuracy = 0.0

    for data in train_loader:
        p_val = data[0]
        way_p = data[1]
        target = way_p[0].float()
        optimizer.zero_grad()

        outputs = wp_generation(p_val[0], model_to_train)
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
        # break

    return running_loss / len(train_loader)

In [None]:
def validation(model_to_validate, val_loader, optimizer, criterion):
    model_to_validate.eval()
    running_loss = 0.0
    running_accuracy = 0.0

    with torch.no_grad():
        for data in val_loader:
            p_val = data[0]
            way_p = data[1]
            target = way_p[0].float()

            outputs = wp_generation(p_val[0], model_to_validate)
            loss = criterion(outputs, target)
            running_loss += loss.item()
            # break

    return running_loss / len(val_loader)

In [None]:
def test(model_to_test, dataloader, optimizer, criterion):
    model_to_test.eval()
    running_loss = 0.0

    with torch.no_grad():
        for data in dataloader:
            p_val = data[0]
            way_p = data[1]
            target = way_p[0].float()

            outputs = wp_generation(p_val[0], model_to_test)
            print(pred)
            print(waypoints)
            loss = criterion(outputs, target)
            total_loss += loss.item()
            break
            break

    return running_loss / len(dataloader)

In [None]:
model_to_save = best_model_trained

In [None]:
torch.save(model__to_save, 'models/VitWPModel.pth')

In [None]:
driver_model = model_to_save

**ALGORITHM 2: Future Trajectory Prediction By LSTM using Multi Modal Inputs**

Dataset Createion (IMAGE to TEMPORAL DATA)

In [None]:
image_processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

root = "datasets"
images_dir = f"{root}/images"
labels_dir = f"{root}/labels"
bev_file = "dataset_bev.json"
batch_size = 8

skip = 10

*Saving BEV details*

In [None]:
def prepare_bev_dataset(images_dir, output_dir):
    bevs = []

    for i in range(0, 28):
        k = "0" if i <= 9 else ""
        curr_folder_images = f"{images_dir}/Video_0{k}{i}"
        images_path = [file for file in os.listdir(curr_folder_images) if file.endswith(".png")]
        images_path.sort()
        images_path = images_path[::skip]

        for image_file in images_path:
            image_path = os.path.join(curr_folder_images, image_file)
            image = Image.open(image_path)
            image_np = np.array(image)

            depth_map = image_to_depth_map(image)
            objects = object_detection(image)
            trajectories = future_positions(objects['boxes'])
            bev_map = depth_map_to_bev(depth_map, objects['boxes'], trajectories)
            wp_x, wp_y = get_waypoint(bev_map)

            output_image_path = os.path.join(curr_folder_images, image_file)
            bevs.append({
                "image": output_image_path,
                "bev_map": bev_map.tolist(),
                "way_point": [wp_x, wp_y]
            })
            # break

            with open(os.path.join(output_dir, bev_file), 'a') as wf:
                json.dump(bevs, wf)
                wf.write("\n")
                # break

        if(i<27):
            print(f"\rProgress: {((i + 1) / 0.27):.2f} %", end = "")
        # break

    print(f"\nDone")

In [None]:
prepare_bev_dataset(images_dir, root)

getting dataset

In [None]:
class BEVDataset(Dataset):
    def __init__(self, root, bev_file, sequence_length = 5):
        self.data = []
        self.sequence_length = sequence_length

        with open(os.path.join(root, bev_file), "r") as wf:
            images = []
            bev_maps = []
            waypoints = []
            for data in wf:
                data = json.loads(data)
                data = data[0]
                # print(data.keys())

                image = data['image']
                bev_map = torch.tensor(data['bev_map'])
                way_point = torch.tensor(data['way_point'])

                images.append(image)
                bev_maps.append(bev_map)
                waypoints.append(way_point)

                if len(images) >= self.sequence_length:
                    sequence_images = images[-self.sequence_length:]
                    sequence_bev_maps = bev_maps[-self.sequence_length:]
                    sequence_waypoints = waypoints[-self.sequence_length:]
                    self.data.append([sequence_images, sequence_bev_maps, sequence_waypoints])
                # break
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
# Split indices for train and validation
dataset = BEVDataset(root, bev_file, sequence_length = 5)
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True, pin_memory=True, drop_last=True)

train_indices, val_indices = train_test_split(range(len(dataset)), train_size = 0.5, test_size=0.5, random_state=42)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)

val_dataset = torch.utils.data.Subset(dataset, val_indices)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True)

BEV to image model (for faster operation)

In [None]:
class BEVToImage(nn.Module):
    def __init__(self):
        super(BEVToImage, self).__init__()
        self.backbone = models.resnet18(pretrained=True)
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-2])
        self.conv1x1 = nn.Conv2d(512, 256, kernel_size=1)
        
        self.upsample = nn.Sequential(
            nn.ConvTranspose2d(256, 512, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(512, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 1, kernel_size=1)
        )
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.conv1x1(x)
        x = self.upsample(x)
        x = F.interpolate(x, size=(200, 200), mode='bilinear', align_corners=False)
        return x.squeeze()

In [None]:
class ImageDataset(Dataset):
    def __init__(self, image_dir):
        self.data = []

        with open(os.path.join(root, bev_file), "r") as wf:
            for data in wf:
                data = json.loads(data)
                data = data[0]
                image = np.array(Image.open(data['image']))
                bev_map = torch.tensor(data['bev_map'])

                self.data.append([image, bev_map])
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
bev_to_image_model = BEVToImage()

In [None]:
imageset = ImageDataset(images_dir)
image_loader = DataLoader(imageset, batch_size = batch_size, shuffle = True, pin_memory=True, drop_last=True)

dataloader = image_loader
train_loader = image_loader
val_loader = image_loader

In [None]:
def wp_generation(inputs, model):
    return model(torch.tensor(np.array(image, dtype = np.float32) / 255.0, dtype=torch.float32).unsqueeze(0).permute(0,3,1,2))

In [None]:
def training(model, image_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in image_loader:
        img = data[0]
        bev = data[1]
        optimizer.zero_grad()
        pred = wp_generation(img[0], model)
        loss = criterion(pred, bev[0].float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(image_loader)

In [None]:
def validation(model, image_loader, optimizer, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in image_loader:
            img = data[0]
            bev = data[1]
            pred = wp_generation(img[0], model)
            loss = criterion(pred, bev[0].float())
            total_loss += loss.item()
    return total_loss / len(image_loader)

In [None]:
def test(model, image_loader, optimizer, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in image_loader:
            img = data[0]
            bev = data[1]
            pred = wp_generation(img[0], model)
            print(pred)
            print(waypoints)
            loss = criterion(pred, bev[0].float())
            total_loss += loss.item()
            break
    return total_loss / len(image_loader)

In [None]:
model_to_train = bev_to_image_model

In [None]:
model_to_save = best_model_trained

In [None]:
torch.save(model_to_save, 'models/BEVToImage.pth')

In [None]:
image_to_bev_model = model_to_save

image > depth map and object detection > bird's eye view

In [None]:
def image_to_depth_map(image):
    estimator = pipeline(task="depth-estimation", model="Intel/dpt-large")
    result = estimator(images=image)
    depth_map = result['predicted_depth']
    return depth_map

In [None]:
def plot_depth_map(depth_map, cord_x = 400, cord_y = 75, c = 'white'):
    depth_map = depth_map.max() - depth_map
    height, width = depth_map.shape
    depth_at_cord = depth_map[cord_y-1, cord_x-1]
    print(f"Depth at co-ordinate ({cord_x}, {cord_y}) of the image: {depth_at_cord:.2f} meters")
    plt.scatter(cord_x, cord_y, color=c, marker='x')
    plt.imshow(depth_map, cmap = 'inferno')
    plt.colorbar()
    plt.show()

In [None]:
def object_detection(image):
    inputs = image_processor(images=image, return_tensors="pt")
    outputs = detr_model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]])
    objects = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]

    return objects

In [None]:
def future_positions(objects, alpha = 0.1):
    trajectories = []
    
    for obj in objects:
        smoothed_positions = [obj]
        for t in range(0, len(obj)):
            smoothed_pos = alpha * obj[t] + (1 - alpha) * smoothed_positions[-1]
            smoothed_positions.append(smoothed_pos)
        future_positions = [smoothed_positions[-1]]
        trajectories.append(future_positions)

    new = []
    for i in trajectories:
        new.append(i[0].detach().numpy())

    trajectories = torch.tensor(new, dtype = torch.float32)
        
    return trajectories

In [None]:
def depth_map_to_bev(depth_map, objects, trajectories):
    f_x = 1000
    f_y = 1000
    c_x = 400
    c_y = 300
    bev_size = 200
    scale_factor = 15

    height, width = depth_map.shape
    bev_map = np.zeros((bev_size, bev_size), dtype=np.float32)

    x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))

    depth = depth_map[y_coords, x_coords].detach().numpy()
    X = (x_coords - c_x) * depth / f_x
    Y = (y_coords - c_y) * depth / f_y

    bev_x = np.clip((X * scale_factor).astype(int), 0, bev_size - 1)
    bev_y = np.clip((Y * scale_factor).astype(int), 0, bev_size - 1)

    for i in range(height * width):
        bev_map[bev_y.flat[i], bev_x.flat[i]] = max(bev_map[bev_y.flat[i], bev_x.flat[i]], depth.flat[i])

    for obj, traj in zip(objects, trajectories):
        x_min, y_min, x_max, y_max = obj
        x_min, y_min, x_max, y_max = x_min.item(), y_min.item(), x_max.item(), y_max.item()

        traj_x_min, traj_y_min, traj_x_max, traj_y_max = traj
        traj_x_min, traj_y_min, traj_x_max, traj_y_max = traj_x_min.item(), traj_y_min.item(), traj_x_max.item(), traj_y_max.item()

        obj_depths = depth_map[int(y_min):int(y_max), int(x_min):int(x_max)].detach().numpy()
        if obj_depths.size > 0:
            mean_depth = np.mean(obj_depths)
        else:
            mean_depth = 0
        
        X_min = (x_min - c_x) * mean_depth / f_x
        X_max = (x_max - c_x) * mean_depth / f_x
        Y_min = (y_min - c_y) * mean_depth / f_y
        Y_max = (y_max - c_y) * mean_depth / f_y
        Z = mean_depth
        bev_x_min = int(100 + X_min * scale_factor)
        bev_x_max = int(100 + X_max * scale_factor)
        bev_y_min = int(100 + Y_min * scale_factor)
        bev_y_max = int(100 + Y_max * scale_factor)

        traj_X_min = (traj_x_min - c_x) * mean_depth / f_x
        traj_X_max = (traj_x_max - c_x) * mean_depth / f_x
        traj_Y_min = (traj_y_min - c_y) * mean_depth / f_y
        traj_Y_max = (traj_y_max - c_y) * mean_depth / f_y
        traj_Z = mean_depth
        traj_bev_x_min = int(100 + traj_X_min * scale_factor)
        traj_bev_x_max = int(100 + traj_X_max * scale_factor)
        traj_bev_y_min = int(100 + traj_Y_min * scale_factor)
        traj_bev_y_max = int(100 + traj_Y_max * scale_factor)

        mean_depth = torch.tensor(mean_depth, dtype = torch.float32)
        if mean_depth > 4:
            bev_map[bev_y_min-10:bev_y_max+10, bev_x_min-10:bev_x_max+10] = 0 # bev_map.max() - mean_depth
            bev_map[traj_bev_y_min-10:traj_bev_y_max+10, traj_bev_x_min-10:traj_bev_x_max+10] = 0 # bev_map.max() - mean_depth

    bev_map = np.clip(bev_map, 0, 255)
    bev_map = bev_map.astype(np.uint8)

    return bev_map

In [None]:
def visualize_bev(bev_map):
    bev_map = bev_map.max() - bev_map
    bev_map = cv2.normalize(bev_map, None, 0, 255, cv2.NORM_MINMAX)
    bev_map = np.uint8(bev_map)
    plt.imshow(bev_map, cmap="jet", interpolation='nearest')
    plt.colorbar()
    plt.show()

In [None]:
def get_waypoint(bev_map):
    bev_map = bev_map.max() - bev_map
    bev_map = cv2.normalize(bev_map, None, 0, 255, cv2.NORM_MINMAX)
    bev_map = np.uint8(bev_map)
    x_mid = 100
    dist_from_x_mid = float('inf')

    filtered_ones = []
    for x in range(0,200,4):
        for y in range(0,200,4):
            b = bev_map[y,x]
            if b < 80:
                dist_from_x_mid = abs(x_mid - x)
                x_for_filtering = x
                y_for_filtering = y
                # plt.scatter(x_for_filtering * 4, y_for_filtering * 3, color = 'black', marker='x')
                filtered_ones.append([y_for_filtering, x_for_filtering])
                # break

    fof_cam = []
    for i in filtered_ones:
        fy = i[0]
        fx = i[1]
        fof_cam.append(bev_map[fy, fx])
    fof_center = []
    for i in range(0, len(filtered_ones)):
        curr_x = filtered_ones[i][1]
        fof_center.append(abs(x_mid - curr_x))

    mini = min(fof_center)
    mini_idxs = []
    mini_val = float('inf')
    for i in range(0, len(fof_center)):
        if fof_center[i] <= mini_val:
            mini_val = fof_center[i]
    for i in range(0, len(fof_center)):
        if(fof_center[i] == mini_val):
            mini_idxs.append(i)
    
    max_x = -float('inf')
    for i in mini_idxs:
        cords = filtered_ones[i]
        if cords[1] > max_x:
            max_x = cords[1]
            sel_y = cords[0]
    cord_x = max_x
    cord_y = sel_y

    x_above = y_for_filtering
    
    # plt.scatter(cord_x * 4, cord_y * 3, color = 'black', marker='x')
    # plt.imshow(depth_map.max() - depth_map, cmap = 'inferno')
    # plt.grid(True)
    # plt.show()

    return cord_x * 4, cord_y * 3

In [None]:
image = Image.open('datasets/images/Video_000/v000_0000.png')
depth_map = image_to_depth_map(image)
plot_depth_map(depth_map, c = 'red')

objects = object_detection(image)
trajectories = future_positions(objects['boxes'])

bev_map = depth_map_to_bev(depth_map, objects['boxes'], trajectories)
visualize_bev(bev_map)

wp_x, wp_y = get_waypoint(bev_map)
plot_depth_map(depth_map, wp_x, wp_y, c = 'red')

OUR TEMPORAL FUSION MODEL

In [None]:
class ConvLSTMCell(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size=3):
        super(ConvLSTMCell, self).__init__()
        padding = kernel_size // 2
        self.conv = nn.Conv2d(input_dim + hidden_dim, hidden_dim * 4, kernel_size, padding=padding)
        self.hidden_dim = hidden_dim

    def forward(self, x, hidden):
        h, c = hidden
        combined = torch.cat([x, h], dim=1)
        conv_out = self.conv(combined)
        i, f, g, o = torch.chunk(conv_out, 4, dim=1)
        i, f, g, o = torch.sigmoid(i), torch.sigmoid(f), torch.tanh(g), torch.sigmoid(o)
        c_next = f * c + i * g
        h_next = o * torch.tanh(c_next)
        return h_next, c_next

In [None]:
class ConvLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, sequence_length):
        super(ConvLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.output_dim = output_dim
        self.sequence_length = sequence_length

        self.lstm_cells = nn.ModuleList([
            ConvLSTMCell(input_dim if i == 0 else hidden_dim, hidden_dim)
            for i in range(num_layers)
        ])
        self.fc = None

    def forward(self, x):
        b, t, c, h, w = x.size()
        h_states = [torch.zeros(b, self.hidden_dim, h, w, device=x.device) for _ in range(self.num_layers)]
        c_states = [torch.zeros(b, self.hidden_dim, h, w, device=x.device) for _ in range(self.num_layers)]
        
        for time_step in range(t):
            inp = x[:, time_step]
            for i, cell in enumerate(self.lstm_cells):
                h_states[i], c_states[i] = cell(inp, (h_states[i], c_states[i]))
                inp = h_states[i]
        
        out = torch.flatten(inp, start_dim=1)

        if self.fc is None:
            self.fc = nn.Linear(self.hidden_dim * h * w, self.output_dim * self.sequence_length).to(x.device)

        return self.fc(out).contiguous().view(b, t, self.output_dim)

In [None]:
input_dim, hidden_dim, num_layers, output_dim, seq_length = 1, 64, 2, 2, 5
model_to_train = ConvLSTM(input_dim, hidden_dim, num_layers, output_dim, seq_length)

*--training set ups--*

In [None]:
def wp_generation(bev, model):
    return model(bev_maps)

In [None]:
def training(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in train_loader:
        bev_maps = torch.stack(data[1], dim=1)
        waypoints = torch.stack(data[2], dim=1).to(torch.float32)
        bev_maps = bev_maps.unsqueeze(2)

        optimizer.zero_grad()
        pred = wp_generation(bev_maps, model)
        loss = criterion(pred, waypoints)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

In [None]:
def validation(model, val_loader, optimizer, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in val_loader:
            bev_maps = torch.stack(data[1], dim=1)
            waypoints = torch.stack(data[2], dim=1).to(torch.float32)
            bev_maps = bev_maps.unsqueeze(2)
            
            pred = wp_generation(bev_maps, model)
            loss = criterion(pred, waypoints)
            total_loss += loss.item()
    return total_loss / len(val_loader)

In [None]:
def test(model, dataloader, optimizer, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in dataloader:
            bev_maps = torch.stack(data[1], dim=1)
            waypoints = torch.stack(data[2], dim=1).to(torch.float32)
            bev_maps = bev_maps.unsqueeze(2)
            
            pred = wp_generation(bev_maps, model)
            print(pred)
            print(waypoints)
            loss = criterion(pred, waypoints)
            total_loss += loss.item()
            break
    return total_loss / len(dataloader)

In [None]:
model_to_save = best_model_trained

In [None]:
torch.save(model_to_save, 'models/ConvLSTM.pth')

In [None]:
driver_model = model_to_save

**ALGORITHM 3: Future Waypoint Prediction using BEV representations and GPS mechanism**

*Saving t1, t1 BEV details*

In [None]:
image_processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

root = "datasets"
images_dir = f"{root}/images"
labels_dir = f"{root}/labels"
bev_file = "dataset_bev.json"
bev_t1_t2_file = "dataset_t1_t2_bev.json"
batch_size = 8

skip = 20

In [None]:
def prepare_t1_t2_bev_dataset(images_dir, output_dir):

    for i in range(0, 28):
        k = "0" if i <= 9 else ""
        curr_folder_images = f"{images_dir}/Video_0{k}{i}"
        images_path = [file for file in os.listdir(curr_folder_images) if file.endswith(".png")]
        images_path.sort()
        images_path = images_path[::skip]
        x_val = 20

        for image_file in images_path:
            if x_val == 180:
                x_val = 20
            bevs = []
            image_path = os.path.join(curr_folder_images, image_file)
            image = Image.open(image_path)
            image_np = np.array(image)

            depth_map = image_to_depth_map(image)
            objects = object_detection(image)
            trajectories_t1, trajectories_t2 = future_positions(objects['boxes'], alpha = 0.3, diff = 0.3)
            bev_map_t1, bev_map_t2 = depth_map_to_bev(depth_map, objects['boxes'], trajectories_t1, trajectories_t2)
            wp_x_t1, wp_y_t1 = get_waypoint(bev_map_t1, x_val)
            wp_x_t2, wp_y_t2 = get_waypoint(bev_map_t2, x_val)

            output_image_path = os.path.join(curr_folder_images, image_file)
            bevs.append({
                "target": x_val,
                "image": output_image_path,
                "bev_map": bev_map_t1.tolist(), # yup, doing 1 bev only (skipping that of t2)
                "way_point": [wp_x_t1, wp_y_t1, wp_x_t2, wp_y_t2]
            })
            # break

            with open(os.path.join(output_dir, bev_t1_t2_file), 'a') as wf:
                json.dump(bevs, wf)
                wf.write("\n")
                # break

            x_val = x_val + 40

        if(i<27):
            print(f"\rProgress: {((i + 1) / 0.27):.2f} %", end = "")
        # break

    print(f"\nDone")

In [None]:
prepare_t1_t2_bev_dataset(images_dir, root)

getitng dataset

In [None]:
class BEV_t1_t2_Dataset(Dataset):
    def __init__(self, root, bev_t1_t2_file):
        self.data = []

        with open(os.path.join(root, bev_t1_t2_file), "r") as wf:
            for data in wf:
                data = json.loads(data)
                data = data[0]
                # print(data.keys())

                target = data['target']
                image = data['image']
                bev_map = torch.tensor(data['bev_map'])
                way_point = torch.tensor(data['way_point'])
                
                self.data.append([target, image, bev_map, way_point])
                # break
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
# Split indices for train and validation
dataset = BEV_t1_t2_Dataset(root, bev_t1_t2_file)
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True, pin_memory=True, drop_last=True)

train_indices, val_indices = train_test_split(range(len(dataset)), train_size = 0.5, test_size=0.5, random_state=42)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)

val_dataset = torch.utils.data.Subset(dataset, val_indices)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True)

image and navigation targets > bird's eye view

In [None]:
def image_to_depth_map(image):
    estimator = pipeline(task="depth-estimation", model="Intel/dpt-large")
    result = estimator(images=image)
    depth_map = result['predicted_depth']
    return depth_map

In [None]:
def object_detection(image):
    inputs = image_processor(images=image, return_tensors="pt")
    outputs = detr_model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]])
    objects = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]

    return objects

In [None]:
def future_positions(objects, alpha = 0.1, diff = 5):
    trajectories_t1 = []
    trajectories_t2 = []
    
    for obj in objects:
        smoothed_positions_t1 = [obj]
        smoothed_positions_t2 = [obj]
        for t in range(0, len(obj)):
            smoothed_pos_t1 = alpha * obj[t] + (1 - alpha) * smoothed_positions_t1[-1]
            smoothed_pos_t2 = (diff * alpha) * obj[t] + (1 - (diff * alpha)) * smoothed_positions_t2[-1]
            smoothed_positions_t1.append(smoothed_pos_t1)
            smoothed_positions_t2.append(smoothed_pos_t2)
        future_positions_t1 = [smoothed_positions_t1[-1]]
        future_positions_t2 = [smoothed_positions_t2[-1]]
        trajectories_t1.append(future_positions_t1)
        trajectories_t2.append(future_positions_t2)

    new_t1 = []
    new_t2 = []
    for t1, t2 in zip(trajectories_t1, trajectories_t2):
        new_t1.append(t1[0].detach().numpy())
        new_t2.append(t2[0].detach().numpy())

    trajectories_t1 = torch.tensor(new_t1, dtype = torch.float32)
    trajectories_t2 = torch.tensor(new_t2, dtype = torch.float32)
        
    return trajectories_t1, trajectories_t2

In [None]:
def depth_map_to_bev(depth_map, objects, trajectories_t1, trajectories_t2):
    f_x = 1000
    f_y = 1000
    c_x = 400
    c_y = 300
    bev_size = 200
    scale_factor = 15

    height, width = depth_map.shape
    bev_map_t1 = np.zeros((bev_size, bev_size), dtype=np.float32)
    bev_map_t2 = np.zeros((bev_size, bev_size), dtype=np.float32)

    x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))

    depth = depth_map[y_coords, x_coords].detach().numpy()
    X = (x_coords - c_x) * depth / f_x
    Y = (y_coords - c_y) * depth / f_y

    bev_x = np.clip((X * scale_factor).astype(int), 0, bev_size - 1)
    bev_y = np.clip((Y * scale_factor).astype(int), 0, bev_size - 1)

    for i in range(height * width):
        bev_map_t1[bev_y.flat[i], bev_x.flat[i]] = max(bev_map_t1[bev_y.flat[i], bev_x.flat[i]], depth.flat[i])
        bev_map_t2[bev_y.flat[i], bev_x.flat[i]] = max(bev_map_t2[bev_y.flat[i], bev_x.flat[i]], depth.flat[i])

    for obj, traj_t1, traj_t2 in zip(objects, trajectories_t1, trajectories_t2):
        x_min, y_min, x_max, y_max = obj
        x_min, y_min, x_max, y_max = x_min.item(), y_min.item(), x_max.item(), y_max.item()

        traj_x_min_t1, traj_y_min_t1, traj_x_max_t1, traj_y_max_t1 = traj_t1
        traj_x_min_t1, traj_y_min_t1, traj_x_max_t1, traj_y_max_t1 = traj_x_min_t1.item(), traj_y_min_t1.item(), traj_x_max_t1.item(), traj_y_max_t1.item()

        traj_x_min_t2, traj_y_min_t2, traj_x_max_t2, traj_y_max_t2 = traj_t2
        traj_x_min_t2, traj_y_min_t2, traj_x_max_t2, traj_y_max_t2 = traj_x_min_t2.item(), traj_y_min_t2.item(), traj_x_max_t2.item(), traj_y_max_t2.item()

        obj_depths = depth_map[int(y_min):int(y_max), int(x_min):int(x_max)].detach().numpy()
        if obj_depths.size > 0:
            mean_depth = np.mean(obj_depths)
        else:
            mean_depth = 0
        
        X_min = (x_min - c_x) * mean_depth / f_x
        X_max = (x_max - c_x) * mean_depth / f_x
        Y_min = (y_min - c_y) * mean_depth / f_y
        Y_max = (y_max - c_y) * mean_depth / f_y
        Z = mean_depth
        bev_x_min = int(100 + X_min * scale_factor)
        bev_x_max = int(100 + X_max * scale_factor)
        bev_y_min = int(100 + Y_min * scale_factor)
        bev_y_max = int(100 + Y_max * scale_factor)

        traj_X_min_t1 = (traj_x_min_t1 - c_x) * mean_depth / f_x
        traj_X_max_t1 = (traj_x_max_t1 - c_x) * mean_depth / f_x
        traj_Y_min_t1 = (traj_y_min_t1 - c_y) * mean_depth / f_y
        traj_Y_max_t1 = (traj_y_max_t1 - c_y) * mean_depth / f_y
        traj_Z_t1 = mean_depth
        traj_bev_x_min_t1 = int(100 + traj_X_min_t1 * scale_factor)
        traj_bev_x_max_t1 = int(100 + traj_X_max_t1 * scale_factor)
        traj_bev_y_min_t1 = int(100 + traj_Y_min_t1 * scale_factor)
        traj_bev_y_max_t1 = int(100 + traj_Y_max_t1 * scale_factor)

        traj_X_min_t2 = (traj_x_min_t2 - c_x) * mean_depth / f_x
        traj_X_max_t2 = (traj_x_max_t2 - c_x) * mean_depth / f_x
        traj_Y_min_t2 = (traj_y_min_t2 - c_y) * mean_depth / f_y
        traj_Y_max_t2 = (traj_y_max_t2 - c_y) * mean_depth / f_y
        traj_Z_t2 = mean_depth
        traj_bev_x_min_t2 = int(100 + traj_X_min_t2 * scale_factor)
        traj_bev_x_max_t2 = int(100 + traj_X_max_t2 * scale_factor)
        traj_bev_y_min_t2 = int(100 + traj_Y_min_t2 * scale_factor)
        traj_bev_y_max_t2 = int(100 + traj_Y_max_t2 * scale_factor)

        mean_depth = torch.tensor(mean_depth, dtype = torch.float32)
        if mean_depth > 4:
            # bev_map_t1[bev_y_min-10:bev_y_max+10, bev_x_min-10:bev_x_max+10] = 0 # bev_map_t1.max() - mean_depth
            bev_map_t1[traj_bev_y_min_t1-10:traj_bev_y_max_t1+10, traj_bev_x_min_t1-10:traj_bev_x_max_t1+10] = 0 # bev_map_t1.max() - mean_depth
            
            # bev_map_t2[bev_y_min-10:bev_y_max+10, bev_x_min-10:bev_x_max+10] = 0 # bev_map_t2.max() - mean_depth
            bev_map_t2[traj_bev_y_min_t2-10:traj_bev_y_max_t2+10, traj_bev_x_min_t2-10:traj_bev_x_max_t2+10] = 0 # bev_map_t2.max() - mean_depth

    bev_map_t1 = np.clip(bev_map_t1, 0, 255)
    bev_map_t1 = bev_map_t1.astype(np.uint8)

    bev_map_t2 = np.clip(bev_map_t2, 0, 255)
    bev_map_t2 = bev_map_t2.astype(np.uint8)

    return bev_map_t1, bev_map_t2

In [None]:
def get_waypoint(bev_map, x_mid):
    bev_map = bev_map.max() - bev_map
    bev_map = cv2.normalize(bev_map, None, 0, 255, cv2.NORM_MINMAX)
    bev_map = np.uint8(bev_map)
    dist_from_x_mid = float('inf')

    filtered_ones = []
    for x in range(0,200,4):
        for y in range(0,200,4):
            b = bev_map[y,x]
            if b < 80:
                dist_from_x_mid = abs(x_mid - x)
                x_for_filtering = x
                y_for_filtering = y
                # plt.scatter(x_for_filtering * 4, y_for_filtering * 3, color = 'black', marker='x')
                filtered_ones.append([y_for_filtering, x_for_filtering])
                # break

    fof_cam = []
    for i in filtered_ones:
        fy = i[0]
        fx = i[1]
        fof_cam.append(bev_map[fy, fx])
    fof_center = []
    for i in range(0, len(filtered_ones)):
        curr_x = filtered_ones[i][1]
        fof_center.append(abs(x_mid - curr_x))

    mini = min(fof_center)
    mini_idxs = []
    mini_val = float('inf')
    for i in range(0, len(fof_center)):
        if fof_center[i] <= mini_val:
            mini_val = fof_center[i]
    for i in range(0, len(fof_center)):
        if(fof_center[i] == mini_val):
            mini_idxs.append(i)
    
    max_x = -float('inf')
    for i in mini_idxs:
        cords = filtered_ones[i]
        if cords[1] > max_x:
            max_x = cords[1]
            sel_y = cords[0]
    cord_x = max_x
    cord_y = sel_y

    x_above = y_for_filtering
    
    # plt.scatter(cord_x * 4, cord_y * 3, color = 'black', marker='x')
    # plt.imshow(depth_map.max() - depth_map, cmap = 'inferno')
    # plt.grid(True)
    # plt.show()

    return cord_x * 4, cord_y * 3

In [None]:
def target_relative_to_ego(ego_transform, point_world):
    ego_loc = ego_transform.location
    ego_rot = ego_transform.rotation
    
    yaw_rad = np.radians(ego_rot.yaw)
    
    R = np.array([
        [np.cos(-yaw_rad), -np.sin(-yaw_rad)],
        [np.sin(-yaw_rad),  np.cos(-yaw_rad)]
    ])
    
    translated_point = np.array([point_world.x - ego_loc.x, point_world.y - ego_loc.y])
    
    local_point = R @ translated_point
    
    return local_point[1], local_point[0]

In [None]:
def ego_rotation(local_x, local_y):
    rel = math.degrees(math.atan(local_x / local_y))
    return rel

In [None]:
def target_points_from_gps(target):
    ego_transform = ego.get_transform()
    point_world = carla.Location(*target)

    local_x, local_y = target_relative_to_ego(ego_transform, point_world)
    dist_from_ego = ((local_x ** 2) + (local_y ** 2)) ** (1/2)
    rel = ego_rotation(local_x, local_y)

    rel = max(min(rel, 50), -50)
    local_y = max(min(local_y, 37.5), 0)

    x_to_plot = 400 + (8 * rel)
    y_to_plot = 600 - (25 * local_y)

    return dist_from_ego, rel, x_to_plot, y_to_plot

In [None]:
def bev_on_graph(local_x, local_y):
    plt.figure(figsize=(8, 6))
    plt.scatter(0, 0, color='red', label='e', s=100)
    plt.scatter(local_x, local_y, color='blue', label='t')
    plt.xlim(-800, 800)
    plt.ylim(-600, 600)
    plt.xlabel("X (meters)")
    plt.ylabel("Y (meters)")
    plt.axhline(0, color='black',linewidth=1)
    plt.axvline(0, color='black',linewidth=1)
    plt.grid(True)
    plt.legend()

    plt.show()

In [None]:
def visualize_bev(bev_map):
    bev_map = bev_map.max() - bev_map
    bev_map = cv2.normalize(bev_map, None, 0, 255, cv2.NORM_MINMAX)
    bev_map = np.uint8(bev_map)
    plt.figure(figsize = (3,3))
    plt.imshow(bev_map, cmap="jet", interpolation='nearest')
    plt.colorbar()
    plt.show()

In [None]:
def plot_depth_map(depth_map, cord_x = 400, cord_y = 75, c = 'white'):
    depth_map = depth_map.max() - depth_map
    height, width = depth_map.shape
    depth_at_cord = depth_map[cord_y-1, cord_x-1]
    print(f"Depth at co-ordinate ({cord_x}, {cord_y}) of the image: {depth_at_cord:.2f} meters")
    plt.scatter(cord_x, cord_y, color=c, marker='x')
    plt.imshow(depth_map, cmap = 'inferno')
    plt.colorbar()
    plt.show()

In [None]:
image = Image.open('datasets/images/Video_000/v000_0000.png')
depth_map = image_to_depth_map(image)
objects = object_detection(image)
trajectories_t1, trajectories_t2 = future_positions(objects['boxes'], alpha = 0.3, diff = 0.3)
bev_map_t1, bev_map_t2 = depth_map_to_bev(depth_map, objects['boxes'], trajectories_t1, trajectories_t2)

target = (158.0238037109375, 46.202396392822266, 0.0)
wp_x_t1, wp_y_t1 = get_waypoint(bev_map_t1, target[0])
wp_x_t2, wp_y_t2 = get_waypoint(bev_map_t2, target[0])

In [None]:
visualize_bev(bev_map_t1)
plot_depth_map(depth_map, wp_x_t1, wp_y_t1, c = 'red')
visualize_bev(bev_map_t2)
plot_depth_map(depth_map, wp_x_t2, wp_y_t2, c = 'red')

In [None]:
target = (158.0238037109375, 46.202396392822266, 0.0)
dist, rel, x_target, y_target = target_points_from_gps(target)
print(dist, rel, x_target, y_target)
bev_on_graph(x_target, y_target)

OUR CNN MODEL

In [None]:
class CNN_WPModel(nn.Module):
    def __init__(self):
        super(CNN_WPModel, self).__init__()

        resnet = models.resnet18(pretrained=True)
        self.resnet_features = nn.Sequential(*list(resnet.children())[:-1])

        # self.conv_layers = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(32),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(32, 256, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(256),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(256, 1024, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(1024),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(256),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        # )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(512 + 1, 512),
            # nn.Linear(512 * 6 * 6 + 1, 512),
            nn.ReLU(),
            nn.Linear(512, 4),
            nn.Dropout(0.25),
        )

    def forward(self, pixel_values, target_loc):
        x = self.resnet_features(pixel_values)
        # x = self.conv_layers(pixel_values)
        x = x.view(x.size(0), -1)
        x = torch.cat((x, target_loc.unsqueeze(0).view(1, 1)), dim=1)
        output = self.fc_layers(x)
        
        return output.squeeze()

In [None]:
model_to_train = CNN_WPModel()

*--training set ups--*

In [None]:
def wp_generation(values, locs, model):
    return model(values, locs)

In [None]:
def training(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in train_loader:
        target_loc = torch.tensor(data[0], dtype = torch.float)
        bev_maps = data[2].float()
        waypoints = data[3].float()
        bev_maps = bev_maps.unsqueeze(0).permute(0, 2, 1)
        bev_maps = bev_maps.repeat(1, 3, 1, 1)

        optimizer.zero_grad()
        pred = wp_generation(bev_maps, target_loc, model)
        loss = criterion(pred, waypoints)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

In [None]:
def validation(model, val_loader, optimizer, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in val_loader:
            target_loc = torch.tensor(data[0], dtype = torch.float)
            bev_maps = data[2].float()
            waypoints = data[3].float()
            bev_maps = bev_maps.unsqueeze(0).permute(0, 2, 1)
            bev_maps = bev_maps.repeat(1, 3, 1, 1)
            
            pred = wp_generation(bev_maps, target_loc, model)
            loss = criterion(pred, waypoints)
            total_loss += loss.item()
    return total_loss / len(val_loader)

In [None]:
def test(model, dataloader, optimizer, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in dataloader:
            target_loc = torch.tensor(data[0], dtype = torch.float)
            bev_maps = data[2].float()
            waypoints = data[3].float()
            bev_maps = bev_maps.unsqueeze(0).permute(0, 2, 1)
            bev_maps = bev_maps.repeat(1, 3, 1, 1)
            
            pred = wp_generation(bev_maps, target_loc, model)
            print(pred)
            print(waypoints)
            loss = criterion(pred, waypoints)
            total_loss += loss.item()
            break
    return total_loss / len(dataloader)

In [None]:
model_to_save = best_model_trained

In [None]:
torch.save(model_to_save, 'models/CNN_WPModel.pth')

In [None]:
driver_model = model_to_save

**Model Management Section**

In [None]:
optimizer = optim.Adam(model_to_train.parameters(), lr=1e-2)
criterion = nn.MSELoss()

test section

In [None]:
test_loss = test(model_to_train, dataset, optimizer, criterion)
print(f"test loss is: {test_loss}")

training loop

In [None]:
iterations = 10
num_epochs = 20
lr = 1e0

In [None]:
for i in range(0,iterations):
  lr = lr / 10
  optimizer = optim.Adam(model_to_train.parameters(), lr=lr)
  best_train_loss = float('inf')
  best_val_loss = float('inf')
  patience = 5
  print(f"Setting Learning rate to: {lr}")

  for epoch in range(num_epochs):
    best_model_trained = model_to_train
    if(patience == 0):
      break
    
    train_loss = training(model_to_train, train_dataset, optimizer, criterion)
    val_loss = validation(model_to_train, val_dataset, optimizer, criterion)
    
    if(train_loss > (best_train_loss * 0.95) or (val_loss > (best_val_loss * 0.95))):
      model_to_train = best_model_trained
      patience -= 1
    else:
      best_train_loss = train_loss
      best_val_loss = val_loss
      best_model_trained = model_to_train
      patience = 5
      # torch.save(model_to_train.state_dict(), '/content/drive/MyDrive/my_model.pth')
    print(f"\rEpoch {epoch + 1}/{num_epochs}, train-Loss: {train_loss:.1f}, val-Loss: {val_loss:.1f}     ", end = "")#, patience: {patience}", end = "")
    # break
  print("\n")
  # break

---------- REDUNENT CODE ----------

In [None]:
class VitWPModel(nn.Module):
    def __init__(self):
        super(VitWPModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 512, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(512)
        self.conv3 = nn.Conv2d(512, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 512, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.conv5 = nn.Conv2d(512, 64, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(64)
        
        self.fc1 = nn.Linear(64 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, 2)

        self.dropout = nn.Dropout(0.25)

    def forward(self, pixel_values):
        x = torch.relu(self.conv1(pixel_values))
        x = self.bn1(x)
        x = torch.max_pool2d(x, 2)
        
        x = torch.relu(self.conv2(x))
        x = self.bn2(x)
        x = torch.max_pool2d(x, 2)

        x = torch.relu(self.conv3(x))
        x = self.bn3(x)
        x = torch.max_pool2d(x, 2)

        x = torch.relu(self.conv4(x))
        x = self.bn4(x)
        x = torch.max_pool2d(x, 2)

        x = torch.relu(self.conv5(x))
        x = self.bn5(x)
        x = torch.max_pool2d(x, 2)

        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        
        output = self.fc2(x)
        return output.squeeze()

In [None]:
class BEVToImage(nn.Module):
    def __init__(self):
        super(BEVToImage, self).__init__()
        self.backbone = models.resnet18(pretrained=True)
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-2])
        self.conv1x1 = nn.Conv2d(512, 256, kernel_size=1)
        
        self.upsample = nn.Sequential(
            nn.ConvTranspose2d(256, 512, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(512, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 1, kernel_size=1)
        )
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.conv1x1(x)
        x = self.upsample(x)
        x = F.interpolate(x, size=(200, 200), mode='bilinear', align_corners=False)
        return x.squeeze()

In [None]:
class ConvLSTMCell(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size=3):
        super(ConvLSTMCell, self).__init__()
        padding = kernel_size // 2
        self.conv = nn.Conv2d(input_dim + hidden_dim, hidden_dim * 4, kernel_size, padding=padding)
        self.hidden_dim = hidden_dim

    def forward(self, x, hidden):
        h, c = hidden
        combined = torch.cat([x, h], dim=1)
        conv_out = self.conv(combined)
        i, f, g, o = torch.chunk(conv_out, 4, dim=1)
        i, f, g, o = torch.sigmoid(i), torch.sigmoid(f), torch.tanh(g), torch.sigmoid(o)
        c_next = f * c + i * g
        h_next = o * torch.tanh(c_next)
        return h_next, c_next

In [None]:
class ConvLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, sequence_length):
        super(ConvLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.output_dim = output_dim
        self.sequence_length = sequence_length

        self.lstm_cells = nn.ModuleList([
            ConvLSTMCell(input_dim if i == 0 else hidden_dim, hidden_dim)
            for i in range(num_layers)
        ])
        self.fc = None

    def forward(self, x):
        b, t, c, h, w = x.size()
        h_states = [torch.zeros(b, self.hidden_dim, h, w, device=x.device) for _ in range(self.num_layers)]
        c_states = [torch.zeros(b, self.hidden_dim, h, w, device=x.device) for _ in range(self.num_layers)]
        
        for time_step in range(t):
            inp = x[:, time_step]
            for i, cell in enumerate(self.lstm_cells):
                h_states[i], c_states[i] = cell(inp, (h_states[i], c_states[i]))
                inp = h_states[i]
        
        out = torch.flatten(inp, start_dim=1)

        if self.fc is None:
            self.fc = nn.Linear(self.hidden_dim * h * w, self.output_dim * self.sequence_length).to(x.device)

        return self.fc(out).view(b, 5, 2).mean(dim=1).view(b, t, self.output_dim)

In [None]:
image_to_bev_model = BEVToImage()
image_to_bev_model = torch.load('models/BEVToImage.pth', map_location=torch.device('cpu'))

input_dim, hidden_dim, num_layers, output_dim, seq_length = 1, 64, 2, 2, 5
driver_model = ConvLSTM(input_dim, hidden_dim, num_layers, output_dim, seq_length)
driver_model = torch.load('models/ConvLSTM.pth', map_location=torch.device('cpu'))

In [None]:
class CNN_WPModel(nn.Module):
    def __init__(self):
        super(CNN_WPModel, self).__init__()

        resnet = models.resnet18(pretrained=True)
        self.resnet_features = nn.Sequential(*list(resnet.children())[:-1])

        # self.conv_layers = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(32),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(32, 256, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(256),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(256, 1024, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(1024),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(256),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        #     nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2, 2),
        # )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(512 + 1, 512),
            # nn.Linear(512 * 6 * 6 + 1, 512),
            nn.ReLU(),
            nn.Linear(512, 4),
            nn.Dropout(0.25),
        )

    def forward(self, pixel_values, target_loc):
        x = self.resnet_features(pixel_values)
        # x = self.conv_layers(pixel_values)
        x = x.view(x.size(0), -1)
        x = torch.cat((x, target_loc.unsqueeze(0).view(1, 1)), dim=1)
        output = self.fc_layers(x)
        
        return output.squeeze()

**CarLA simulation**

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
semantic_label_classes = ["Traffic Sign", "Building", "Fence", "Other", "Pedestrian", "Pole", "Road Line", "Road", "Sidewalk", "Vegetation", "Car", "Wall", "Unlabeled"]

In [None]:
client = carla.Client('localhost', 2000)
world = client.get_world()
blueprint_library = world.get_blueprint_library()

In [None]:
def remove_all_actors():
    actors = world.get_actors()
    for actor in actors:
        if 'vehicle' in actor.type_id:
            actor.destroy()
    world.tick()
    print("previous ones destroyed")

In [None]:
def initiating_carla(world, blueprint_library):
    ego_blueprint = blueprint_library.filter('*model3')[0]
    spawn_points = world.get_map().get_spawn_points()[0]
    global ego
    ego = world.spawn_actor(ego_blueprint, spawn_points)

    camera_bp = blueprint_library.find('sensor.camera.rgb')
    camera_bp.set_attribute('image_size_x', '800')
    camera_bp.set_attribute('image_size_y', '600')
    camera_bp.set_attribute('fov', '110')
    camera_transform = carla.Transform(carla.Location(x = 1.5, z = 2.4))
    global camera
    camera = world.spawn_actor(camera_bp, camera_transform, attach_to=ego)

In [None]:
remove_all_actors()
initiating_carla(world, blueprint_library)

In [None]:
def EGO_relative_view(EGO):
    view = EGO.get_transform().location + carla.Location(x=0, y=0, z=10)
    return view

In [None]:
def set_cam_to_EGO(EGO, angle = 65):
    spectator = world.get_spectator()
    vehicle_transform = EGO.get_transform()
    spectator.set_transform(
        carla.Transform(
            EGO_relative_view(EGO),
            carla.Rotation(pitch = -angle, yaw = vehicle_transform.rotation.yaw, roll = 0)
        )
    )

In [None]:
def camera_to_image(image):
    global image_captured
    array = np.frombuffer(image.raw_data, dtype=np.uint8)
    array = array.reshape((image.height, image.width, 4))
    image_captured = Image.fromarray(array[:, :, :3])

In [None]:
set_cam_to_EGO(ego)

*functions for model output to control*

In [None]:
def steering(x, intended_x, s_power):
    towards = intended_x - x
    steer = towards * 0.0002
    steer = steer * (1 + s_power) / 1.5
    steer = max(-1, min(steer, 1))
    return steer

In [None]:
def TandB(y_mid, intended_y_mid, curr_y = 25):
    if(intended_y_mid <= curr_y):
        return 0, 1
    intended_y_mid = intended_y_mid if intended_y_mid < y_mid else y_mid
    return max(0.3, min((y_mid - intended_y_mid) / (2 * y_mid), 1)), 0

In [None]:
def control_ego(steer, throttle, brake):
    control = carla.VehicleControl()
    control.steer = steer
    control.throttle = throttle
    control.brake = brake

    return control

calculation of metrics

*--route completion--*

In [None]:
def evaluate_RC(intended_x_mid, ego_x_position, intended_y_mid, ego_y_position):
    global rcc
    if intended_y_mid > ego_y_position:
        dist = math.sqrt(((intended_x_mid - ego_x_position) ** 2) + ((intended_y_mid - ego_y_position) ** 2))
        if dist <= 900:
            rcc = rcc + 1

*--infraction score--*

In [None]:
class CollisionSensor(object):
    def __init__(self, vehicle):
        self.vehicle = vehicle
        self.world = vehicle.get_world()
        self.collision_history = []

        blueprint = self.world.get_blueprint_library().find('sensor.other.collision')
        self.sensor = self.world.spawn_actor(blueprint, carla.Transform(), attach_to=self.vehicle)
        self.sensor.listen(self._on_collision)

    def _on_collision(self, event):
        self.collision_history.append(event)
        
    def get_collision_count(self):
        return len(self.collision_history)

In [None]:
def detect_red_light_violation():
    global red_light_violations
    ego_position = ego.get_location()
    traffic_lights = world.get_traffic_lights()
    red_light_violation = False
    for traffic_light in traffic_lights:
        light_position = traffic_light.get_location()
        distance_to_light = ego_position.distance(light_position)
        if distance_to_light < 20.0:
            if traffic_light.get_state() == carla.TrafficLightState.Red:
                red_light_violation = True

In [None]:
def evaluate_IS(speed_limit, red_light_violations, collision_sensor):
    global isc

    velocity = ego.get_velocity()
    speed = (velocity.x**2 + velocity.y**2 + velocity.z**2) ** 0.5 * 3.6
    
    if speed > speed_limit:
        isc -= 1
        
    if red_light_violations:
        isc -= 1

    if collision_sensor.get_collision_count() > 0:
        isc -= 10

    if isc < 0:
        isc = 0

*--driving score--*

In [None]:
def evaluate_DS():
    global RC
    global IS
    global dsc
    dsc = (RC + IS) / 2

controlling based on vit_wp_model

In [None]:
driver_model = VitWPModel()
driver_model.load_state_dict(torch.load('models/VitWPModel.pth', map_location=torch.device('cpu')))

In [None]:
def image_to_emb(image):
    return processor(text=semantic_label_classes, images=image, return_tensors="pt", padding=True)['pixel_values']

In [None]:
def model_output_generation(image, vit_wp_model):
    pixel_values = image_to_emb(image).squeeze(1)
    outputs = vit_wp_model(pixel_values)
    return outputs

In [None]:
def normalize_wp(way_p_1, top_w,top_h):
    way_p = way_p_1

    mini_f = float('inf')
    maxi_f = -float('inf')
    for i in way_p[0]:
        if(maxi_f < i[0]):
            maxi_f = i[0]
        if(mini_f > i[0]):
            mini_f = i[0]
        
    mini_s = float('inf')
    maxi_s = -float('inf')
    for i in way_p[1]:
        if(maxi_s < i[1]):
            maxi_s = i[1]
        if(mini_s > i[1]):
            mini_s = i[1]
        
    for i in way_p:
        i[0] = (i[0] - mini_f) / (maxi_f - mini_f)
        i[1] = (i[1] - mini_s) / (maxi_s - mini_s)
        i[0] *= top_h
        i[1] *= top_w

    return way_p

In [None]:
def points():
    return 400, 120

controlling based on conv_LSTM_model

In [None]:
image_to_bev_model = BEVToImage()
image_to_bev_model = torch.load('models/BEVToImage.pth', map_location=torch.device('cpu'))

input_dim, hidden_dim, num_layers, output_dim, seq_length = 1, 64, 2, 2, 5
driver_model = ConvLSTM(input_dim, hidden_dim, num_layers, output_dim, seq_length)
driver_model = torch.load('models/ConvLSTM.pth', map_location=torch.device('cpu'))

In [None]:
def model_output_generation(image, driver_model):
    bev = image_to_bev_model(torch.tensor(np.array(image, dtype = np.float32) / 255.0, dtype=torch.float32).unsqueeze(0).permute(0,3,1,2)).unsqueeze(0)
    bev = bev.unsqueeze(0)
    outputs = driver_model(bev.unsqueeze(0))
    return outputs.squeeze()

In [None]:
def points():
    return 400, 120

controlling based on CNN_wp_model

In [None]:
image_to_bev_model = BEVToImage()
image_to_bev_model = torch.load('models/BEVToImage.pth', map_location=torch.device('cpu'))

driver_model = CNN_WPModel()
driver_model = torch.load('models/CNN_WPModel.pth', map_location=torch.device('cpu'))

In [None]:
def get_road_map(vehicle, world):
    curr_loc = vehicle.get_location()
    map = world.get_map()
    wps = map.get_waypoint(curr_loc,project_to_road=True, lane_type=(carla.LaneType.Driving | carla.LaneType.Sidewalk))

    my_geolocation = map.transform_to_geolocation(vehicle.get_transform().location)
    all_map_waypoints = map.generate_waypoints(2.0)
    waypoints_on_map = map.get_topology()

    return waypoints_on_map

In [None]:
def distance(loc1, loc2):
    return math.sqrt((loc1.x - loc2.x) ** 2 + (loc1.y - loc2.y) ** 2 + (loc1.z - loc2.z) ** 2)

In [None]:
def make_graph(junction_waypoints):
    graph = {}
    ego_loc = (ego.get_transform().location.x, ego.get_transform().location.y, ego.get_transform().location.z)
    if ego_loc not in graph:
        graph[ego_loc] = []
    to = (junction_waypoints[0][0].transform.location.x, junction_waypoints[0][0].transform.location.y, junction_waypoints[0][0].transform.location.z)
    graph[ego_loc].append((to, 0.0))
    
    for start_wp, end_wp in junction_waypoints:
        start_loc = (start_wp.transform.location.x, start_wp.transform.location.y, start_wp.transform.location.z)
        end_loc = (end_wp.transform.location.x, end_wp.transform.location.y, end_wp.transform.location.z)
        dist = distance(start_wp.transform.location, end_wp.transform.location)

        if start_loc not in graph:
            graph[start_loc] = []
        graph[start_loc].append((end_loc, dist))

    return graph

In [None]:
def dijkstra(graph, start):
    queue = [(0, start)]
    distances = {node: float('inf') for node in graph}
    distances[start] = 0
    
    previous_nodes = {}

    while queue:
        current_distance, current_node = heapq.heappop(queue)
        for neighbor, weight in graph.get(current_node, []):
            new_distance = current_distance + weight
            if new_distance < distances[neighbor]:
                distances[neighbor] = new_distance
                previous_nodes[neighbor] = current_node
                heapq.heappush(queue, (new_distance, neighbor))

    return distances, previous_nodes

In [None]:
def reconstruct_path(previous_nodes, start, goal):
    path = []
    current = goal
    while current != start:
        path.append(current)
        current = previous_nodes.get(current)
        if current is None:
            return None
    path.append(start)
    path.reverse()
    return path

In [None]:
curr_ego_loc = ego.get_transform().location
initiation = (curr_ego_loc.x, curr_ego_loc.y, curr_ego_loc.z)
goal = (158.0238037109375, 46.202396392822266, 0.0)

wps = make_graph(get_road_map(ego, world))
distances, prev_nodes = dijkstra(wps, initiation)
path = reconstruct_path(prev_nodes, initiation, goal)

print("Shortest path:", path)

In [None]:
def model_output_generation(image, model, locs = 400):
    np_image = np.array(image, dtype = np.float32) / 255.0
    tensor_image = torch.tensor(np_image, dtype=torch.float32)
    tensor_image = tensor_image.permute(2,0,1).unsqueeze(0)
    bev_op = image_to_bev_model(tensor_image)

    bev_maps = bev_op.unsqueeze(0).permute(0, 2, 1)
    bev = bev_maps.repeat(1, 3, 1, 1)

    outputs = driver_model(bev, torch.tensor(locs, dtype = torch.float32))
    return outputs.squeeze()

In [None]:
def points():
    return 400, 120

RUNNING THE SIMULATOR

In [None]:
try:
    print("started")

    prev_image_captured = None
    image_captured = None

    start_time = time.time()

    RC = 0
    IS = 0
    DS = 0
    rcc = 0
    isc = 100

    distance = 0.0
    speed_limit = 50
    red_light_violations = False
    collision_sensor = CollisionSensor(ego)
    init_pos = ego.get_transform().location
    fin_pos = init_pos

    set_cam_to_EGO(ego)
    camera.listen(camera_to_image) #changes image_captured globally

    while image_captured is None:
        world.tick()

    while True:
        world.wait_for_tick()
        if not prev_image_captured == image_captured:

            scene_x, scene_y = 800, 600
            ego_x_mid = scene_x/2
            ego_y_mid = scene_y/2

            outputs = model_output_generation(image_captured, driver_model)
            waypoint = abs(outputs.detach().numpy())

            ego_x_position, ego_y_position = points()

            intended_x = waypoint[0]
            intended_y = waypoint[1]
            
            curr_pos = ego.get_transform().location
            dx = curr_pos.x - fin_pos.x
            dy = curr_pos.y - fin_pos.y
            dz = curr_pos.z - fin_pos.z
            d = math.sqrt((dx ** 2) + (dy ** 2) + (dz ** 2))
            distance = distance + d

            evaluate_RC(intended_x, ego_x_position, intended_y, ego_y_position)
            evaluate_IS(speed_limit, red_light_violations, collision_sensor)

            throttle, brake = TandB(ego_y_mid, intended_y, ego_y_position)
            velocity = ego.get_velocity()
            speed = math.sqrt((velocity.x**2 + velocity.y**2 + velocity.z**2)) / 5
            s_power = (1 - brake) * (1 - speed)
            steer = steering(ego_x_position, intended_x, s_power)

            print(f"\rpredicted: {waypoint}, going towards: ({intended_x:.1f}, {intended_y:.1f}), with b:{brake}, t:{throttle:.1f}, s:{steer:.1f}      ", end = "")

            ego.apply_control(control_ego(steer, throttle, brake))
            set_cam_to_EGO(ego)
                
        if(time.time() >= start_time + 60):
            break
        prev_image_captured = image_captured
        
    if(distance > 0):
        RC = rcc * 100 / distance
        IS = isc * 100 / distance
    evaluate_DS()

    print(f"\nRC is: {RC:.2f}, DS is: {DS:.2f}, IS is: {IS:.2f}")

except Exception as e:
    print(f"\n{e}")
finally:
    camera.stop()
    
    ego.apply_control(control_ego(0,0,1))

    remove_all_actors()
    initiating_carla(world, blueprint_library)
    set_cam_to_EGO(ego, 65)

    print("stopped")

In [None]:
ego.apply_control(control_ego(1,1,0))

In [None]:
ego.apply_control(control_ego(0,0,1))