<a href="https://colab.research.google.com/github/RedwanNewaz/3DPedestrainDetection/blob/master/3D_Object_Waymo_Open_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Waymo Open Dataset Tutorial

- Website: https://waymo.com/open
- GitHub: https://github.com/waymo-research/waymo-open-dataset

This tutorial demonstrates how to use the Waymo Open Dataset with two frames of data. Visit the [Waymo Open Dataset Website](https://waymo.com/open) to download the full dataset.

To use, open this notebook in [Colab](https://colab.research.google.com).

Uncheck the box "Reset all runtimes before running" if you run this colab directly from the remote kernel. Alternatively, you can make a copy before trying to run it by following "File > Save copy in Drive ...".



## Install waymo_open_dataset package

In [None]:
!rm -rf waymo-od > /dev/null
!git clone https://github.com/waymo-research/waymo-open-dataset.git waymo-od
!cd waymo-od && git branch -a
!cd waymo-od && git checkout remotes/origin/r1.0
!pip3 install --upgrade pip

In [None]:
!pip3 install waymo-open-dataset-tf-2-6-0
import os
import tensorflow as tf
import math
import numpy as np
import itertools

# tf.enable_eager_execution()

from waymo_open_dataset.utils import range_image_utils
from waymo_open_dataset.utils import transform_utils
from waymo_open_dataset.utils import  frame_utils
from waymo_open_dataset import dataset_pb2 as open_dataset

## Read one frame

Each file in the dataset is a sequence of frames ordered by frame start timestamps. We have extracted two frames from the dataset to demonstrate the dataset format.

In [None]:

class FusedSensors:
    def __init__(self, frame):
        self.frame = frame
        (self.range_images, self.camera_projections, self.seg_labels,
         self.range_image_top_pose) = frame_utils.parse_range_image_and_camera_projection(
            frame)

    def extract_lidar_data(self):
        points, cp_points = frame_utils.convert_range_image_to_point_cloud(
            self.frame,
            self.range_images,
            self.camera_projections,
            self.range_image_top_pose)
        # 3d points in vehicle frame.
        points_all = np.concatenate(points, axis=0)
        cp_points_all = np.concatenate(cp_points, axis=0)
        return points_all, cp_points_all

    def extract_ri2_data(self):
        points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud(
            self.frame,
            self.range_images,
            self.camera_projections,
            self.range_image_top_pose,
            ri_index=1)

        points_all_ri2 = np.concatenate(points_ri2, axis=0)
        cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0)
        return points_all_ri2, cp_points_all_ri2

    def __extract_images(self):
        images = sorted(self.frame.images, key=lambda i: i.name)
        return images

    def get_front_image(self):
        front_cam_index = 0
        raw_img = self.__extract_images()[front_cam_index].image
        return tf.image.decode_jpeg(raw_img).numpy()

    def projected_points(self):
        points_all, cp_points_all = self.extract_lidar_data()

        # The distance between lidar points and vehicle frame origin.
        points_all_tensor = tf.norm(points_all, axis=-1, keepdims=True)
        cp_points_all_tensor = tf.constant(cp_points_all, dtype=tf.int32)
        images = self.__extract_images()
        mask = tf.equal(cp_points_all_tensor[..., 0], images[0].name)

        cp_points_all_tensor = tf.cast(tf.gather_nd(
            cp_points_all_tensor, tf.where(mask)), dtype=tf.float32)
        points_all_tensor = tf.gather_nd(points_all_tensor, tf.where(mask))

        projected_points_all_from_raw_data = tf.concat(
            [cp_points_all_tensor[..., 1:3], points_all_tensor], axis=-1).numpy()
        return projected_points_all_from_raw_data

In [None]:
# tf.enable_eager_execution()
FILENAME = '/content/waymo-od/tutorial/frames'
dataset = tf.data.TFRecordDataset(FILENAME, compression_type='')
for data in dataset:
    frame = open_dataset.Frame()
    frame.ParseFromString(bytearray(data.numpy()))
    data = FusedSensors(frame)
    pcl, cp_pcl = data.extract_lidar_data()
    image = data.get_front_image()
    projected_pcl = data.projected_points()
    print(pcl.shape)
    
    break

Compute maximum and minimum ranges


In [None]:
print('projected pcl max depth = ', np.max(projected_pcl.T[2, :] ), " min depth = ", np.min(projected_pcl.T[2, :] ))
print('raw pcl max depth = ', np.max(pcl.T[0, :] ), " min depth = ", np.min(pcl.T[0, :] ))

Object Detection

In [None]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version

In [None]:
# install dependencies: (use cu111 because colab has CUDA 11.1)
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

# install mmcv-full thus we could use CUDA operators
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html

# Install mmdetection
!rm -rf mmdetection
!git clone https://github.com/open-mmlab/mmdetection.git
%cd mmdetection

!pip install -e .

In [None]:
# We download the pre-trained checkpoints for inference and finetuning.
!mkdir checkpoints
!wget -c https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth \
      -O checkpoints/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth

In [None]:
import mmcv
from mmcv.runner import load_checkpoint

from mmdet.apis import inference_detector, show_result_pyplot
from mmdet.models import build_detector

# Choose to use a config and initialize the detector
config = 'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py'
# Setup a checkpoint file to load
checkpoint = 'checkpoints/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth'

# Set the device to be used for evaluation
device='cuda:0'
device=False

# Load the config
config = mmcv.Config.fromfile(config)
# Set pretrained to be None since we do not need pretrained model here
config.model.pretrained = None

# Initialize the detector
model = build_detector(config.model)

# Load checkpoint
# checkpoint = load_checkpoint(model, checkpoint, map_location=device)
checkpoint = load_checkpoint(model, checkpoint)

# Set the classes of models for inference
model.CLASSES = checkpoint['meta']['CLASSES']

# We need to set the model's cfg for inference
model.cfg = config

# Convert the model to GPU
# model.to(device)
# Convert the model into evaluation mode
model.eval()

convert projected point cloud to a dictionary

In [None]:
projected_pcl_dict = {}
for point in projected_pcl:
  projected_pcl_dict[(point[0], point[1])] = point[2]

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt 
from matplotlib.patches import Polygon
import scipy.stats as st

img = image.copy()
# plt.imshow(img)
# ax = plt.gca()
H, W, _ = img.shape
result = inference_detector(model, img)
for i, temp in enumerate(np.squeeze(result)):
  for bbox in temp:
    conf = bbox[-1]
    if conf < 0.5: continue 
    bbox_int = bbox.astype(np.int32)
    patch = img[bbox_int[1]:bbox_int[3], bbox_int[0]:bbox_int[2], :]

    depths = []
    for y in range(bbox_int[1], bbox_int[3]):
      for x in range(bbox_int[0], bbox_int[2]):
        d = projected_pcl_dict.get((x, y))
        if d is not None:
          depths.append(d)
    
    if(depths):
      # print(np.mean(depths), np.std(depths))
      X = int(np.mean(depths))
      std = np.sqrt(np.std(depths))
      z = (X - np.mean(depths)) /  std
      print(f"dist = {X} conf = {1.0 - st.norm.cdf(z):.4f} std = {std : .4f}, samples = {len(depths)}")

    
      


    plt.imshow(patch)
    plt.show()


    # poly = [[bbox_int[0], bbox_int[1]], [bbox_int[0], bbox_int[3]],
    #         [bbox_int[2], bbox_int[3]], [bbox_int[2], bbox_int[1]]]
    # np_poly = np.array(poly).reshape((4, 2)) 
    # ax.add_patch(Polygon(np_poly))
    # print(poly)
    # print(patch.shape, conf)
    

show_result_pyplot(model, img, result, score_thr=0.3)