In [96]:
# Dependencies 
# !pip3 install torch torchvision torchaudio
# !pip3 install nuscenes-devkit

In [102]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
import torch.optim as optim
from PIL import Image
import torch.nn.functional as F
from IPython.display import Image as IPImage
from scipy import misc

In [98]:
from nuscenes.can_bus.can_bus_api import NuScenesCanBus
from nuscenes.nuscenes import NuScenes
PATH = '/Users/jonathanmorris/Downloads/v1.0-mini'

nusc = NuScenes(version='v1.0-mini', dataroot=PATH, verbose=True)
nusc_can = NuScenesCanBus(dataroot=PATH)
HEIGHT = 70
WIDTH = 320

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.739 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


In [99]:
def get_closest_can(time, can_objects):
    closest = {}
    prev_diff = 1000000 # 1 Second in microseconds
    for object in can_objects:
        diff = object["utime"] - time
        if diff > 0 and diff < prev_diff:
            closest = object
            prev_diff = diff
    # print("Time difference: ", prev_diff)
    return closest

def num_to_range(num, inMin, inMax, outMin, outMax):
    return outMin + (float(num - inMin) / float(inMax - inMin) * (outMax - outMin))

def normalize_vehicle_monitor_can(can_obj):
    new_obj = {}

    min_brake = 0
    max_break = 126

    min_steering = -780
    max_steering = 779.9

    min_throttle = 0
    max_throttle = 1000

    new_obj["brake"] = num_to_range(can_obj["brake"], min_brake, max_break, 0, 1)
    new_obj["steering"] = num_to_range(can_obj["steering"], min_steering, max_steering, -1, 1)
    new_obj["throttle"] = num_to_range(can_obj["throttle"], min_throttle, max_throttle, 0, 1)

    new_obj["brake"] = round(new_obj["brake"], 1)
    new_obj["steering"] = round(new_obj["steering"], 1)
    new_obj["throttle"] = round(new_obj["throttle"], 1)

    return new_obj

In [None]:
class NuScenesImageDataset(Dataset):
    """The training table dataset.
    """
    def __init__(self, nusc, nusc_can, transform=None):
        self.nusc = nusc
        self.nusc_can = nusc_can
        self.transform = transform
        self.x_image_data = []
        self.y_vehicle_data = []
        self.len = len(nusc.scene)

        for scene in nusc.scene:
            scene_token = scene['token']
            scene_record = nusc.get('scene', scene_token)
            sample_token = scene_record['first_sample_token']
            while sample_token != '':
                sample_record = nusc.get('sample', sample_token)
                sample_data = nusc.get('sample_data', sample_record['data']["CAM_FRONT"])
                image_path = nusc.get_sample_data_path(sample_data['token'])
                image = Image.open(image_path)
                image = image.resize((HEIGHT, WIDTH))
                self.x_image_data.append(image)
                can_objects = nusc_can.get_messages(sample_data['timestamp'], sample_data['timestamp'] + 1000000)
                closest_can = get_closest_can(sample_data['timestamp'], can_objects)
                vehicle_data = normalize_vehicle_monitor_can(closest_can)
                self.y_vehicle_data.append(vehicle_data)
                sample_token = sample_record['next']
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
        
    def __len__(self):
        return self.len

In [None]:
# https://github.com/Zhenye-Na/e2e-learning-self-driving-cars/blob/master/src/train.ipynb

class NetworkNvidia(nn.Module):
    """NVIDIA model used in the paper."""

    def __init__(self):
        """Initialize NVIDIA model.

        NVIDIA model used
            Image normalization to avoid saturation and make gradients work better.
            Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
            Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
            Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
            Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
            Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
            Drop out (0.5)
            Fully connected: neurons: 100, activation: ELU
            Fully connected: neurons: 50, activation: ELU
            Fully connected: neurons: 10, activation: ELU
            Fully connected: neurons: 1 (output)

        the convolution layers are meant to handle feature engineering
        the fully connected layer for predicting the steering angle.
        """
        super(NetworkNvidia, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 24, 5, stride=2),
            nn.ELU(),
            nn.Conv2d(24, 36, 5, stride=2),
            nn.ELU(),
            nn.Conv2d(36, 48, 5, stride=2),
            nn.ELU(),
            nn.Conv2d(48, 64, 3),
            nn.ELU(),
            nn.Conv2d(64, 64, 3),
            nn.Dropout(0.5)
        )
        self.linear_layers = nn.Sequential(
            nn.Linear(in_features=64 * 2 * 33, out_features=100),
            nn.ELU(),
            nn.Linear(in_features=100, out_features=50),
            nn.ELU(),
            nn.Linear(in_features=50, out_features=10),
            nn.Linear(in_features=10, out_features=3)
        )

    def forward(self, input):
        """Forward pass."""
        input = input.view(input.size(0), 3, HEIGHT, WIDTH)
        output = self.conv_layers(input)
        # print(output.shape)
        output = output.view(output.size(0), -1)
        output = self.linear_layers(output)
        return output

In [None]:
learning_rate = 0.001
batch_size = 64
epochs = 10
model_path = os.path.join(os.getcwd(), "models")

!mkdir models && cd models && mkdir epochs

print(model_path)
transform = transforms.Compose(
        [transforms.Resize((70, 320), antialias=True), transforms.ToTensor()]
    )
model_number = "1.2"
model_file_name = f"nuscenes_model_v{model_number}.pth"

In [None]:
def train():
    print(f"Model Version v{model_number}")
    print("final model weights will be saved to: " + model_path)

    device = torch.device("cuda" if torch.cuda.is_available() else "mps")

    scenes = nusc.scene

    train_size = int(0.8 * len(scenes))
    val_size = len(scenes) - train_size
    train, val = random_split(scenes, [train_size, val_size])

    train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val, batch_size=batch_size, shuffle=True)

    net = NetworkNvidia().to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    print(f"Training on {device}")

    for epoch in range(epochs):
        net.train()
        for batch in enumerate(train_loader):
            for scene in batch[1]['token']:
                scene = nusc.get('scene', scene)
                print("Training on scene " + scene['name'])
                scene_number = int(scene['name'].split("-")[1])

                if scene_number in nusc_can.can_blacklist:
                    print("Skipping scene " + str(scene_number))
                    continue
                
                first_sample_token = scene['first_sample_token']

                current_sample = nusc.get('sample', first_sample_token)

                scene_vehicle_monitor = nusc_can.get_messages(scene['name'], 'vehicle_monitor')

                while True:
                    sensor = "CAM_FRONT"
                    cam_front_data = nusc.get("sample_data", current_sample["data"][sensor])
                    current_image_path = PATH + "/" + cam_front_data["filename"]
                    img = Image.open(current_image_path)

                    img_input = transform(img).to(device)
                    img_input = img_input.view(1, 3, HEIGHT, WIDTH)
                
                    current_vehicle_can = get_closest_can(current_sample["timestamp"], scene_vehicle_monitor)                    

                    if current_vehicle_can == {}:
                        if current_sample['next'] == '':
                            break
                        else:
                            current_sample = nusc.get('sample', current_sample['next'])
                            continue

                    normal_vm_can = normalize_vehicle_monitor_can(current_vehicle_can)

                    steering_targets = normal_vm_can['steering']
                    throttle_targets = normal_vm_can['throttle']
                    breaking_targets = normal_vm_can['brake']

                    label = torch.FloatTensor([steering_targets, throttle_targets, breaking_targets]).to(device)              

                    optimizer.zero_grad()
                    
                    # Forward pass
                    outputs = net(img_input)

                    # Compute loss
                    total_loss = criterion(outputs, label)

                    # Backward pass
                    total_loss.backward()

                    # Update weights
                    optimizer.step()

                    if current_sample['next'] == '':
                        break
                    else:
                        current_sample = nusc.get('sample', current_sample['next'])

        # Validation
        net.eval()
        with torch.no_grad():
            for batch in enumerate(val_loader):
                for scene in batch[1]['token']:
                    scene = nusc.get('scene', scene)
                    print("Validating on scene " + scene['name'])

                    scene_number = int(scene['name'].split("-")[1])

                    if scene_number in nusc_can.can_blacklist:
                        print("Skipping scene " + str(scene_number))
                        continue

                    first_sample_token = scene['first_sample_token']

                    current_sample = nusc.get('sample', first_sample_token)

                    scene_vehicle_monitor = nusc_can.get_messages(scene['name'], 'vehicle_monitor')

                    while True:
                        sensor = "CAM_FRONT"
                        cam_front_data = nusc.get("sample_data", current_sample["data"][sensor])
                        current_image_path = PATH + "/" + cam_front_data["filename"]
                        img = Image.open(current_image_path)

                        img_input = transform(img).to(device)
                        img_input = img_input.view(1, 3, 70, 320)

                        current_vehicle_can = get_closest_can(current_sample["timestamp"], scene_vehicle_monitor)

                        if current_vehicle_can == {}:
                            if current_sample['next'] == '':
                                break
                            else:
                                current_sample = nusc.get('sample', current_sample['next'])
                                continue
                    
                        normal_vm_can = normalize_vehicle_monitor_can(current_vehicle_can)

                        steering_targets = normal_vm_can['steering']
                        throttle_targets = normal_vm_can['throttle']
                        breaking_targets = normal_vm_can['brake']

                        label = torch.FloatTensor([steering_targets, throttle_targets, breaking_targets]).to(device)

                        outputs = net(img_input)

                        val_total_loss = criterion(outputs, label)

                        if current_sample['next'] == '':
                            break
                        else:
                            current_sample = nusc.get('sample', current_sample['next'])

        print(
            f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss.item():.4f}, Validation Loss: {val_total_loss.item():.4f}"
        )
        torch.save(
            net.state_dict(),
            os.path.join(model_path, "epochs", f"model_e{epoch+1}.pth"),
        )

    print("Finished training")
    torch.save(net.state_dict(), os.path.join(model_path, model_file_name))

In [None]:
train()

In [None]:
my_scene = nusc.scene[0]
my_scene_vehicle_monitor_can = nusc_can.get_messages(my_scene['name'], 'vehicle_monitor')
sensor = 'CAM_FRONT'
cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])
current_image_path = PATH+"/"+cam_front_data['filename']
img = Image.open(current_image_path)
image_tensor = transform(img)
image_tensor = image_tensor.view(1, 3, 70, 320)
IPImage(filename=current_image_path) 

In [None]:
os.path.join(model_path, model_file_name)

In [None]:
closest_can = get_closest_can(my_sample['timestamp'], my_scene_vehicle_monitor_can)
normalize_vehicle_monitor_can(closest_can)

In [None]:
model = NetworkNvidia()
model.load_state_dict(torch.load(model_path + "/" + model_file_name, map_location=torch.device('mps')))
model.eval()
model(image_tensor)

In [None]:
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
sensor = 'CAM_FRONT'
cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])
current_image_path = PATH+"/"+cam_front_data['filename']
img = Image.open(current_image_path)
image_tensor = transform(img)
image_tensor = image_tensor.view(1, 3, 70, 320)
IPImage(filename=current_image_path) 

In [None]:
closest_can = get_closest_can(my_sample['timestamp'], my_scene_vehicle_monitor_can)
normalize_vehicle_monitor_can(closest_can)

In [None]:
model(image_tensor)

In [None]:
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
my_sample = nusc.get('sample', my_sample['next'])
sensor = 'CAM_FRONT'
cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])
current_image_path = PATH+"/"+cam_front_data['filename']
img = Image.open(current_image_path)
image_tensor = transform(img)
image_tensor = image_tensor.view(1, 3, 70, 320)
IPImage(filename=current_image_path) 

In [None]:
closest_can = get_closest_can(my_sample['timestamp'], my_scene_vehicle_monitor_can)
normalize_vehicle_monitor_can(closest_can)

In [None]:
model(image_tensor)