# Waymo Open Dataset Motion Tutorial

- Website: https://waymo.com/open
- GitHub: https://github.com/waymo-research/waymo-open-dataset

This tutorial demonstrates:
- How to decode and interpret the data.
- How to train a simple model with Tensorflow.

Visit the [Waymo Open Dataset Website](https://waymo.com/open) to download the full dataset.

To use, open this notebook in [Colab](https://colab.research.google.com).

Uncheck the box "Reset all runtimes before running" if you run this colab directly from the remote kernel. Alternatively, you can make a copy before trying to run it by following "File > Save copy in Drive ...".

# Imports and global definitions

In [1]:
# Data location. Please edit.

# A tfrecord containing tf.Example protos as downloaded from the Waymo dataset
# webpage.

# Replace this path with your own tfrecords.
FILENAME = '/home/sghosal/uncompressed_tf_example_training_training_tfexample.tfrecord-00028-of-01000'

In [None]:
import math
import os
import uuid
import time
import torch

from matplotlib import cm
import matplotlib.animation as animation
import matplotlib.pyplot as plt

import numpy as np
from IPython.display import HTML
import itertools
import tensorflow.compat.v1 as tf

from google.protobuf import text_format
from waymo_open_dataset.metrics.ops import py_metrics_ops
from waymo_open_dataset.metrics.python import config_util_py as config_util
from waymo_open_dataset.protos import motion_metrics_pb2

# Example field definition
roadgraph_features = {
    'roadgraph_samples/dir':
        tf.io.FixedLenFeature([20000, 3], tf.float32, default_value=None),
    'roadgraph_samples/id':
        tf.io.FixedLenFeature([20000, 1], tf.int64, default_value=None),
    'roadgraph_samples/type':
        tf.io.FixedLenFeature([20000, 1], tf.int64, default_value=None),
    'roadgraph_samples/valid':
        tf.io.FixedLenFeature([20000, 1], tf.int64, default_value=None),
    'roadgraph_samples/xyz':
        tf.io.FixedLenFeature([20000, 3], tf.float32, default_value=None),
}

# Features of other agents.
state_features = {
    'state/id':
        tf.io.FixedLenFeature([128], tf.float32, default_value=None),
    'state/type':
        tf.io.FixedLenFeature([128], tf.float32, default_value=None),
    'state/is_sdc':
        tf.io.FixedLenFeature([128], tf.int64, default_value=None),
    'state/tracks_to_predict':
        tf.io.FixedLenFeature([128], tf.int64, default_value=None),
    'state/current/bbox_yaw':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/height':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/length':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/timestamp_micros':
        tf.io.FixedLenFeature([128, 1], tf.int64, default_value=None),
    'state/current/valid':
        tf.io.FixedLenFeature([128, 1], tf.int64, default_value=None),
    'state/current/vel_yaw':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/velocity_x':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/velocity_y':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/width':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/x':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/y':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/z':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/future/bbox_yaw':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/height':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/length':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/timestamp_micros':
        tf.io.FixedLenFeature([128, 80], tf.int64, default_value=None),
    'state/future/valid':
        tf.io.FixedLenFeature([128, 80], tf.int64, default_value=None),
    'state/future/vel_yaw':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/velocity_x':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/velocity_y':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/width':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/x':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/y':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/z':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/past/bbox_yaw':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/height':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/length':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/timestamp_micros':
        tf.io.FixedLenFeature([128, 10], tf.int64, default_value=None),
    'state/past/valid':
        tf.io.FixedLenFeature([128, 10], tf.int64, default_value=None),
    'state/past/vel_yaw':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/velocity_x':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/velocity_y':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/width':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/x':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/y':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/z':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/objects_of_interest': 
        tf.io.FixedLenFeature([128], tf.int64, default_value=None),
}

traffic_light_features = {
    'traffic_light_state/current/state':
        tf.io.FixedLenFeature([1, 16], tf.int64, default_value=None),
    'traffic_light_state/current/valid':
        tf.io.FixedLenFeature([1, 16], tf.int64, default_value=None),
    'traffic_light_state/current/x':
        tf.io.FixedLenFeature([1, 16], tf.float32, default_value=None),
    'traffic_light_state/current/y':
        tf.io.FixedLenFeature([1, 16], tf.float32, default_value=None),
    'traffic_light_state/current/z':
        tf.io.FixedLenFeature([1, 16], tf.float32, default_value=None),
    'traffic_light_state/past/state':
        tf.io.FixedLenFeature([10, 16], tf.int64, default_value=None),
    'traffic_light_state/past/valid':
        tf.io.FixedLenFeature([10, 16], tf.int64, default_value=None),
    'traffic_light_state/past/x':
        tf.io.FixedLenFeature([10, 16], tf.float32, default_value=None),
    'traffic_light_state/past/y':
        tf.io.FixedLenFeature([10, 16], tf.float32, default_value=None),
    'traffic_light_state/past/z':
        tf.io.FixedLenFeature([10, 16], tf.float32, default_value=None),
    'traffic_light_state/current/id':
        tf.io.FixedLenFeature([1, 16], tf.int64, default_value=None),
    'traffic_light_state/past/id':
        tf.io.FixedLenFeature([10, 16], tf.int64, default_value=None),
    'traffic_light_state/past/state': 
        tf.io.FixedLenFeature([10,16],tf.int64,default_value=None),
    'traffic_light_state/current/state': 
        tf.io.FixedLenFeature([1,16],tf.int64,default_value=None),
    'traffic_light_state/past/timestamp_micros': 
        tf.io.FixedLenFeature([10],tf.int64,default_value=None),
    'traffic_light_state/current/timestamp_micros': 
        tf.io.FixedLenFeature([1],tf.int64,default_value=None),


}

features_description = {}
features_description.update(roadgraph_features)
features_description.update(state_features)
features_description.update(traffic_light_features)

In [3]:
dataset = tf.data.TFRecordDataset(FILENAME, compression_type='')
data = next(dataset.as_numpy_iterator())
parsed = tf.io.parse_single_example(data, features_description)
example_parse = tf.io.parse_single_example(data,features_description)
roadgraph_id = tf.cast(parsed['roadgraph_samples/id'], tf.float32)
roadgraph_type = tf.cast(parsed['roadgraph_samples/type'], tf.float32)
roadmap_vector = tf.concat([parsed['roadgraph_samples/xyz'],parsed['roadgraph_samples/dir'],
                            roadgraph_type,roadgraph_id],axis=1)

In [4]:
def create_figure_and_axes(size_pixels):
  """Initializes a unique figure and axes for plotting."""
  fig, ax = plt.subplots(1, 1, num=uuid.uuid4())

  # Sets output image to pixel resolution.
  dpi = 100
  size_inches = size_pixels / dpi
  fig.set_size_inches([size_inches, size_inches])
  fig.set_dpi(dpi)
  fig.set_facecolor('white')
  ax.set_facecolor('white')
  ax.xaxis.label.set_color('black')
  ax.tick_params(axis='x', colors='black')
  ax.yaxis.label.set_color('black')
  ax.tick_params(axis='y', colors='black')
  fig.set_tight_layout(True)
  ax.grid(False)
  return fig, ax


def fig_canvas_image(fig):
  """Returns a [H, W, 3] uint8 np.array image from fig.canvas.tostring_rgb()."""
  # Just enough margin in the figure to display xticks and yticks.
  fig.subplots_adjust(
      left=0.08, bottom=0.08, right=0.98, top=0.98, wspace=0.0, hspace=0.0)
  fig.canvas.draw()
  data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  return data.reshape(fig.canvas.get_width_height()[::-1] + (3,))


def get_colormap(num_agents):
  """Compute a color map array of shape [num_agents, 4]."""
  colors = cm.get_cmap('jet', num_agents)
  colors = colors(range(num_agents))
  np.random.shuffle(colors)
  return colors


def get_viewport(all_states, all_states_mask):
  """Gets the region containing the data.

  Args:
    all_states: states of agents as an array of shape [num_agents, num_steps,
      2].
    all_states_mask: binary mask of shape [num_agents, num_steps] for
      `all_states`.

  Returns:
    center_y: float. y coordinate for center of data.
    center_x: float. x coordinate for center of data.
    width: float. Width of data.
  """
  valid_states = all_states[all_states_mask]
  all_y = valid_states[..., 1]
  all_x = valid_states[..., 0]

  center_y = (np.max(all_y) + np.min(all_y)) / 2
  center_x = (np.max(all_x) + np.min(all_x)) / 2

  range_y = np.ptp(all_y)
  range_x = np.ptp(all_x)

  width = max(range_y, range_x)

  return center_y, center_x, width


def visualize_one_step(states,
                       mask,
                       roadgraph,
                       title,
                       center_y,
                       center_x,
                       width,
                       color_map,
                       size_pixels=1000):
  """Generate visualization for a single step."""

  # Create figure and axes.
  fig, ax = create_figure_and_axes(size_pixels=size_pixels)

  # Plot roadgraph.
  rg_pts = roadgraph[:, :2].T
  ax.plot(rg_pts[0, :], rg_pts[1, :], 'k.', alpha=1, ms=2)

  masked_x = states[:, 0][mask]
  masked_y = states[:, 1][mask]
  colors = color_map[mask]

  # Plot agent current position.
  ax.scatter(
      masked_x,
      masked_y,
      marker='o',
      linewidths=3,
      color=colors,
  )

  # Title.
  ax.set_title(title)

  # Set axes.  Should be at least 10m on a side and cover 160% of agents.
  size = max(10, width * 1.0)
  ax.axis([
      -size / 2 + center_x, size / 2 + center_x, -size / 2 + center_y,
      size / 2 + center_y
  ])
  ax.set_aspect('equal')

  image = fig_canvas_image(fig)
  plt.close(fig)
  return image


def visualize_all_agents_smooth(
    decoded_example,
    size_pixels=1000,
):
  """Visualizes all agent predicted trajectories in a serie of images.

  Args:
    decoded_example: Dictionary containing agent info about all modeled agents.
    size_pixels: The size in pixels of the output image.

  Returns:
    T of [H, W, 3] uint8 np.arrays of the drawn matplotlib's figure canvas.
  """
  # [num_agents, num_past_steps, 2] float32.
  past_states = tf.stack(
      [decoded_example['state/past/x'], decoded_example['state/past/y']],
      -1).numpy()
  past_states_mask = decoded_example['state/past/valid'].numpy() > 0.0

  # [num_agents, 1, 2] float32.
  current_states = tf.stack(
      [decoded_example['state/current/x'], decoded_example['state/current/y']],
      -1).numpy()
  current_states_mask = decoded_example['state/current/valid'].numpy() > 0.0

  # [num_agents, num_future_steps, 2] float32.
  future_states = tf.stack(
      [decoded_example['state/future/x'], decoded_example['state/future/y']],
      -1).numpy()
  future_states_mask = decoded_example['state/future/valid'].numpy() > 0.0

  # [num_points, 3] float32.
  roadgraph_xyz = decoded_example['roadgraph_samples/xyz'].numpy()

  num_agents, num_past_steps, _ = past_states.shape
  num_future_steps = future_states.shape[1]

  color_map = get_colormap(num_agents)

  # [num_agens, num_past_steps + 1 + num_future_steps, depth] float32.
  all_states = np.concatenate([past_states, current_states, future_states], 1)

  # [num_agens, num_past_steps + 1 + num_future_steps] float32.
  all_states_mask = np.concatenate(
      [past_states_mask, current_states_mask, future_states_mask], 1)

  center_y, center_x, width = get_viewport(all_states, all_states_mask)

  images = []

  # Generate images from past time steps.
  for i, (s, m) in enumerate(
      zip(
          np.split(past_states, num_past_steps, 1),
          np.split(past_states_mask, num_past_steps, 1))):
    im = visualize_one_step(s[:, 0], m[:, 0], roadgraph_xyz,
                            'past: %d' % (num_past_steps - i), center_y,
                            center_x, width, color_map, size_pixels)
    images.append(im)

  # Generate one image for the current time step.
  s = current_states
  m = current_states_mask

  im = visualize_one_step(s[:, 0], m[:, 0], roadgraph_xyz, 'current', center_y,
                          center_x, width, color_map, size_pixels)
  images.append(im)

  # Generate images from future time steps.
  for i, (s, m) in enumerate(
      zip(
          np.split(future_states, num_future_steps, 1),
          np.split(future_states_mask, num_future_steps, 1))):
    im = visualize_one_step(s[:, 0], m[:, 0], roadgraph_xyz,
                            'future: %d' % (i + 1), center_y, center_x, width,
                            color_map, size_pixels)
    images.append(im)

  return images


images = visualize_all_agents_smooth(parsed)

In [5]:
def create_animation(images):
  """ Creates a Matplotlib animation of the given images.

  Args:
    images: A list of numpy arrays representing the images.

  Returns:
    A matplotlib.animation.Animation.

  Usage:
    anim = create_animation(images)
    anim.save('/tmp/animation.avi')
    HTML(anim.to_html5_video())
  """

  plt.ioff()
  fig, ax = plt.subplots()
  dpi = 100
  size_inches = 1000 / dpi
  fig.set_size_inches([size_inches, size_inches])
  plt.ion()

  def animate_func(i):
    ax.imshow(images[i])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid('off')

  anim = animation.FuncAnimation(
      fig, animate_func, frames=len(images) // 2, interval=100)
  plt.close(fig)
  return anim


anim = create_animation(images[::5])
HTML(anim.to_html5_video())

In [6]:
roadgraph_id = tf.cast(parsed['roadgraph_samples/id'], tf.float32)
roadgraph_type = tf.cast(parsed['roadgraph_samples/type'], tf.float32)
roadmap_vector = tf.concat([parsed['roadgraph_samples/xyz'],parsed['roadgraph_samples/dir'],
                            roadgraph_type,roadgraph_id],axis=1)

#Finding all polylines of roadgraph
polyline_collection = []
max_size = -1e6
for i in range(int(tf.reduce_max(roadgraph_id).numpy())):
    index = tf.where(roadgraph_id==i+1).numpy()[:,0]
    if len(index) > max_size: 
        max_size = len(index)
    x  = np.zeros(shape=(len(index),8), dtype=np.float32)
    for j in range(len(index)):
        
        indexing = index[j]
        #print(indexing)
        x[j] = roadmap_vector[indexing,:]
    polyline_collection.append(x)
print("The given example contains {} number of unique map vectors".format(len(polyline_collection)))
print("Largest size of vector in roadmap polyline is {}".format(max_size))
roadmap_polyline = np.full((len(polyline_collection),max_size,8),-1e6,dtype=np.float32)
for k in range(len(polyline_collection)): 
    roadmap_polyline[k][0:len(polyline_collection[k])] = polyline_collection[k]
print(len(roadmap_polyline[1]))
roadmap_polyline = torch.tensor(roadmap_polyline)
print(roadmap_polyline.shape)

The given example contains 349 number of unique map vectors
Largest size of vector in roadmap polyline is 333
333
torch.Size([349, 333, 8])


In [7]:
#Finding polylines of agents
#Shifting origin to centre of line joining agents of interest
index = tf.where(parsed['state/objects_of_interest'])
origin_X = 0.5*(float(parsed['state/current/x'][int(index[0])]+parsed['state/current/x'][int(index[1])]))
origin_Y = 0.5*(float(parsed['state/current/y'][int(index[1])]+ parsed['state/current/y'][int(index[0])]))
origin_Z = 0.5*(float(parsed['state/current/z'][int(index[1])]+ parsed['state/current/z'][int(index[0])]))
origin = [origin_X, origin_Y, origin_Z]
print(origin)
past_X = parsed['state/past/x'].numpy()
past_Y = parsed['state/past/y'].numpy()
past_Z = parsed['state/past/z'].numpy()
current_X = parsed['state/current/x'].numpy()
current_Y = parsed['state/current/y'].numpy()
current_Z = parsed['state/current/z'].numpy()
past_X = past_X - origin_X
past_Y = past_Y - origin_Y
past_Z = past_Z - origin_Z
current_X = current_X - origin_X
current_Y = current_Y - origin_Y
current_Z = current_Z - origin_Z
past_X[parsed['state/past/valid']==0] = -1
past_Y[parsed['state/past/valid']==0] = -1
past_Z[parsed['state/past/valid']==0] = -1
current_X[parsed['state/current/valid']==0] = -1
current_Y[parsed['state/current/valid']==0] = -1
current_Z[parsed['state/current/valid']==0] = -1
agent_polyline = []
valid_states = tf.where(parsed['state/id']>=0).numpy()
for i in valid_states:
    i = int(i)
    x = np.zeros(shape = (10,9), dtype = np.float32)
    for j in range(parsed['state/past/x'].shape[1] - 1):
        x[j] = [past_X[i][j],past_Y[i][j],past_Z[i][j],
                past_X[i][j+1],past_Y[i][j+1],past_Z[i][j+1],
                parsed['state/past/timestamp_micros'][i][j],parsed['state/type'][i],parsed['state/id'][i]]
    k = parsed['state/past/x'].shape[1] - 1
    x[k] = [past_X[i][k],past_Y[i][k],past_Z[i][k],
            current_X[i],current_Y[i],current_Z[i],
            parsed['state/current/timestamp_micros'][i],parsed['state/type'][i],parsed['state/id'][i]]
    agent_polyline.append(x)
agent_polyline = tf.convert_to_tensor(agent_polyline)
print(agent_polyline.shape)

[-3454.5576171875, 3208.935546875, 58.092308044433594]
(128, 10, 9)


In [8]:
import torch.nn as nn 
import torch.nn.functional as F
print(roadmap_polyline.shape)

torch.Size([349, 333, 8])


In [9]:
class MLP(nn.Module): 
    def __init__(self,size3,hidden): 
        super(MLP,self).__init__()
        self.size3 = size3
        self.hidden = hidden
        self.linear = nn.Linear(size3,hidden)
        self.norm = nn.LayerNorm([hidden])
    def forward(self,input_var,size1,size2): 
        x = self.linear(input_var)
        y = self.norm(x)
        out = F.relu(y)
        pool = F.max_pool2d(out,(size2,1),stride=1)
        repeat = pool.repeat(1,size2,1)
        node_output = torch.cat([out,repeat],axis = -1)
        return node_output

In [10]:
class SubGraph(nn.Module): 
    def __init__(self,size3): 
        super(SubGraph,self).__init__()
        self.size3  = size3
        hidden = 64
        self.MLP1 = MLP(size3,hidden)
        self.MLP2 = MLP(2*hidden,hidden)
        self.MLP3 = MLP(2*hidden,hidden)
        
    def forward(self,input_var,size1,size2):
        x = self.MLP1(input_var,size1,size2)
        y = self.MLP2(x,size1,size2)
        z = self.MLP3(y,size1,size2)
        output = F.max_pool2d(z,(size2,1),stride=1)
        return output

In [11]:
agent_subgraph = torch.tensor(agent_polyline.numpy())
roadmap_subgraph = roadmap_polyline
subgraph1 = SubGraph(agent_subgraph.shape[2])
subgraph2 = SubGraph(roadmap_subgraph.shape[2])
agent_subgraph = subgraph1(agent_subgraph,agent_subgraph.shape[0],agent_subgraph.shape[1])
roadmap_subgraph = subgraph2(roadmap_subgraph,roadmap_subgraph.shape[0],roadmap_subgraph.shape[1])

In [12]:
print(agent_subgraph.shape)

torch.Size([128, 1, 128])


In [13]:
print(agent_subgraph)

tensor([[[0.3911, 0.7085, 0.0000,  ..., 0.0000, 0.8487, 0.0000]],

        [[0.3700, 0.6468, 0.0000,  ..., 0.0000, 0.9553, 0.0000]],

        [[0.3443, 0.6586, 0.0000,  ..., 0.0000, 0.9931, 0.0000]],

        ...,

        [[0.7368, 0.3435, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.7367, 0.3439, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.7371, 0.3430, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],
       grad_fn=<MaxPool2DWithIndicesBackward>)


In [14]:
print(roadmap_subgraph)

tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.3885, 0.7706]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.3843, 0.7733]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.3819, 0.7746]],

        ...,

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.3427, 0.6973]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.3429, 0.6973]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.3432, 0.6972]]],
       grad_fn=<MaxPool2DWithIndicesBackward>)


In [15]:
print(roadmap_subgraph.shape)
graph_input = torch.cat([agent_subgraph,roadmap_subgraph],axis = 0)
print(graph_input.shape)

torch.Size([349, 1, 128])
torch.Size([477, 1, 128])


In [16]:
class GNN(nn.Module): 
    def __init__(self,size): 
        super(GNN,self).__init__()
        self.key_weights = nn.Linear(size,64)
        self.value_weights = nn.Linear(size,64)
        self.query_weights = nn.Linear(size,64)
    def forward(self,input_var): 
        keys = self.key_weights(input_var)
        values = self.value_weights(input_var)
        query = self.query_weights(input_var)
        #output = nn.MultiheadAttention(keys,values,query)
    
        attention = torch.matmul(query,keys.transpose(1,2))
        attention_softmax = F.softmax(attention,dim=-1)
        weighted_values = torch.matmul(attention_softmax,values)
        output = weighted_values.sum(dim=1)
        print(output.shape)
        return output       

In [17]:
GNN_graph = GNN(graph_input.shape[-1])
graph_output = GNN_graph(graph_input)

torch.Size([477, 64])


In [18]:
print(graph_output)

tensor([[-0.0856,  0.1577,  0.0113,  ..., -0.6824,  0.0786, -0.2504],
        [-0.0953,  0.1585, -0.0100,  ..., -0.6452,  0.0818, -0.2246],
        [-0.1560,  0.1106, -0.0016,  ..., -0.6857,  0.0577, -0.3043],
        ...,
        [-0.0602,  0.4229, -0.1116,  ..., -0.4912, -0.6375, -0.5854],
        [-0.0601,  0.4234, -0.1115,  ..., -0.4909, -0.6373, -0.5854],
        [-0.0602,  0.4234, -0.1115,  ..., -0.4909, -0.6372, -0.5852]],
       grad_fn=<SumBackward1>)


Vehicles of interest are predicted using parsed['state/objects_of_interest']. We can find the index where we have objects of interest using tf.where(parsed['state/objects_of_interest']==1). Then we can find all details for these objects by indexing desired data. Ex: x coordinates in past of these agents : parsed['state/past/x][1] or parsed['state/past/x][4]

Change of coordinate frames
1. Lets say origin is now at some position r1 = (x1,y1,z1). Then coordinates of the agents in the new origin will be r = r2 - r1, where r2 is the position in current coordinate frame. 
2. If we also decide to rotate the coordinate frame by some angle, then new position representation of agents will be 
[r;1] = T * [r2;1]; 
where T is the Transformation matrix betweem the frames 
T = [r11 r12 r13 px;
     r21 r22 r23 py; 
     r32 r32 r33 pz; 
      0    0    0  1]; 
      

In [19]:
print(parsed['state/objects_of_interest'])

tf.Tensor(
[0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], shape=(128,), dtype=int64)
