# Testing neural networks on NGSIM dataset!

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision.models as models
from torchvision.models.resnet import resnet50
import torch.nn.functional as F
from tqdm import tqdm
import os
import random
import pandas as pd
import pickle
np.random.seed(228)
random.seed(228)
torch.manual_seed(228)
torch.random.manual_seed(228)


  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x2341e321a70>

In [2]:
filename = "Next_Generation_Simulation__NGSIM__Vehicle_Trajectories_and_Supporting_Data.csv"
NGSIM = pd.read_csv(filename)

  NGSIM = pd.read_csv(filename)


In [3]:
def string2float(my_string):
    return float(my_string.replace(',',''))
NGSIM["Local_Y"] = NGSIM["Local_Y"].apply(string2float)

Vehicle_ID         int64
Frame_ID           int64
Total_Frames      object
Global_Time        int64
Local_X          float64
Local_Y          float64
Global_X         float64
Global_Y         float64
v_length         float64
v_Width          float64
v_Class            int64
v_Vel            float64
v_Acc            float64
Lane_ID           object
O_Zone           float64
D_Zone           float64
Int_ID           float64
Section_ID       float64
Direction        float64
Movement         float64
Preceding         object
Following         object
Space_Headway     object
Time_Headway      object
Location          object
dtype: object


# Dataset info:
11,850,526 datapoints 
<br>
<b>Labels:</b> "Vehicle_ID" "Frame_ID" "Total_Frames" "Global_Time" "Local_X" "Local_Y" "Global_X"
                             "Global_Y" "v_length" "v_Width" "v_Class" "v_Vel" v_Acc Lane_ID O_Zone D_Zone Int_ID
                             Section_ID Direction Movement Preceding Following Space_Headway Time_Headway Location
<br>
<b>Locations:</b> "us-101" "i-80" "lankershim" "peachtree"

In [94]:
def get_id_list(data, location):
    '''
    Inputs:
        data: NGSIM subset data according to location (pandas DataFrame)
        location: location (string)
    '''
    filename = location + "_id_list"
    try:
        with open(filename, "rb") as fp:
            id_list = pickle.load(fp)
    except: 
        id_list = []
        for i in tqdm(range(data.shape[0])):
            if data.iloc[i].loc["Vehicle_ID"] not in id_list:
                id_list.append(data.iloc[i].loc["Vehicle_ID"])
        id_list.sort()
        with open(filename, "wb") as fp:
            pickle.dump(id_list, fp)
    
    return id_list

def filter_extra_IDs(vehicle_data):
    total_frame_tracker = []
    for i in range(vehicle_data.shape[0]):
        if vehicle_data.iloc[i].loc["Total_Frames"] not in total_frame_tracker:
            total_frame_tracker.append(vehicle_data.iloc[i].loc["Total_Frames"])
    most_frames = max(total_frame_tracker)
    vehicle_data = vehicle_data.loc[vehicle_data["Total_Frames"] == most_frames]
    return vehicle_data

def graph_lane_vs_frame(vehicle_data):
    Lane_ID = vehicle_data["Lane_ID"].to_numpy(dtype=np.float16)   
    fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True)
    ax1.scatter(vehicle_data["Frame_ID"], Lane_ID)
    ax2.scatter(vehicle_data["Frame_ID"], vehicle_data["Local_X"])
    ax1.set_title("Lane ID")
    ax2.set_title("Local X")
    plt.xlabel("Frame ID")

def detect_switches(vehicle_data, delete_interval=1.5):
    Lane_ID = vehicle_data["Lane_ID"].to_numpy(dtype=int)
    boolean_switch = np.diff(Lane_ID)
    switch_index = np.where(boolean_switch != 0)
    switch_index = switch_index[0] + 1
    switch_frames = vehicle_data["Frame_ID"].iloc[switch_index].to_numpy()
    
    # sometimes the lane switch data is fuzzy. Deleting lane switches within <delete_interval> seconds
    if len(switch_frames) >= 2:
        delete_frames = int(delete_interval*10)
        frames_between_switches = np.diff(switch_frames)
        frames_too_close = frames_between_switches < delete_frames
        frames_too_close = np.append(False, frames_too_close)
        switch_frames = np.delete(switch_frames, frames_too_close)
    
    switch_frames = validate_switches(switch_frames, vehicle_data)
    
    # even if no lane change, we still want to sample
    if len(switch_frames)==0:
        lowest_frame = vehicle_data["Frame_ID"].iloc[0]+51
        highest_frame = vehicle_data["Frame_ID"].iloc[vehicle_data.shape[0]-1]-53
        switch_frames = [random.randint(lowest_frame, highest_frame)]
    return switch_frames

def validate_switches(switch_frames, vehicle_data):
    first_frame = vehicle_data["Frame_ID"].iloc[0]
    last_frame = vehicle_data["Frame_ID"].iloc[vehicle_data.shape[0]-1]

    bool_delete = np.zeros_like(switch_frames, dtype=bool)
    
    for index, value in enumerate(switch_frames):
        if abs(last_frame-value) <= 53 or abs(first_frame-value) <= 53:
            bool_delete[index] = True
    switch_frames = np.delete(switch_frames, bool_delete)
    return switch_frames
 
def extract_features(switch_frame, vehicle_data, time_interval=5):
    '''
    Inputs:
        switch_frame: singular lane switch frame
        vehicle_data: same as always
        time_interval: how much time in future and past we take into account. Default: 5s (50 frames before and after LC)
    Returns:
        first and second time derivatives using future values (for example: v_t calculated using x_t and x_(t+1)).
            Each is a 100-D array, with lane switch occuring at 51st index (50th if zero-indexing). Because of 
            downsampling when taking time difference, two extra timesteps must be considered in the frame_interval
    '''
    delta_t = 0.1
    num_frames = int(time_interval/delta_t*2)
    frame_interval = np.array([switch_frame-(time_interval/delta_t), switch_frame+(time_interval/delta_t)+2], dtype=int)
    # Extracting data within frame_interval
    interval_data = vehicle_data.loc[vehicle_data["Frame_ID"]>=frame_interval[0]]
    interval_data = interval_data.loc[vehicle_data["Frame_ID"]<=frame_interval[1]]
    x_pos = interval_data["Local_X"].to_numpy(dtype=np.float32)
    y_pos = interval_data["Local_Y"].to_numpy(dtype=np.float32)
    assert len(x_pos)==103
    assert len(y_pos)==103
    
    # Positional time derivatives
    delta_x = np.diff(x_pos)
    delta_y = np.diff(y_pos)
    v_x = delta_x/delta_t
    v_y = delta_y/delta_t
    delta_v_x = np.diff(v_x)
    
#     yaw = np.arctan(np.divide(v_x,v_y))
    yaw = np.arctan2(v_x, v_y)
    yaw_rate = np.diff(yaw)/delta_t
    
    yaw = yaw[:num_frames]
    yaw_rate = yaw_rate
    lat_vel = v_x
    lat_accel = delta_v_x/delta_t
    
    return [yaw, yaw_rate, lat_vel, lat_accel]

def detect_lane_change(frame, vehicle_data):
    '''
    Detects and returns hot one encoding for left lane switch [1 0 0], no change [0 1 0] or right lane switch [0 0 1]

    '''

    prev_lane = vehicle_data.loc[vehicle_data["Frame_ID"]==frame-1]["Lane_ID"].to_numpy(dtype=int)
    current_lane = vehicle_data.loc[vehicle_data["Frame_ID"]==frame]["Lane_ID"].to_numpy(dtype=int)
    
    if len(prev_lane)!=1 or len(current_lane)!=1:
        return False

    if current_lane == prev_lane:
        LS = np.array([0, 1, 0])
    elif current_lane < prev_lane:
        LS = np.array([1, 0, 0])
    elif current_lane > prev_lane:
        LS = np.array([0, 0, 1])
    return LS
        
    

In [None]:
class my_dataset:
    def __init__(self, num_validation_data=300,num_training_data=80,num_timesteps=100):
        self.num_validation_data=num_validation_data
        self.num_training_data=num_training_data
        self.total_num_data = self.num_validation_data + self.num_training_data
        self.num_extracted_data = 4
        self.num_timesteps = num_timesteps
        
        self.left_turn_data = np.zeros((self.total_num_data, self.num_timesteps, self.num_extracted_data))
        self.right_turn_data = 
        

In [95]:
# LOCATIONS = ["us-101", "i-80", "lankershim", "peachtree"]
LOCATIONS = ["us-101", "i-80"]

LC_tracker = np.array([0,0,0])
for location in LOCATIONS:
    data = NGSIM.loc[NGSIM["Location"] == location]
    id_list = get_id_list(data, location)
    
    for i in tqdm(id_list):
#     for i in [id_list[432]]: # Edge case: multiple quick successive lane changes
#     for i in [20]: # Edge case: lane switch close to end
#     for i in [344]: # Edge case: duplicate data for multiple frames
#     for i in [1234]: # Edge case: duplicate and missing data

        vehicle_data = data.loc[data["Vehicle_ID"] == i].sort_values("Frame_ID")
        vehicle_data = filter_extra_IDs(vehicle_data)
        
        switch_frames = detect_switches(vehicle_data)

        for single_switch_frame in switch_frames:
            lane_change = detect_lane_change(single_switch_frame, vehicle_data)
            if lane_change is not False:
                yaw, yaw_rate, lat_vel, lat_accel = extract_features(single_switch_frame, vehicle_data)
            else:
                continue
            LC_tracker = LC_tracker + lane_change

    print(LC_tracker)
        
        


100%|██████████| 2847/2847 [07:45<00:00,  6.11it/s]

[ 464 2057  175]





In [89]:
print(i)

344


True
