Use this IPython notebook on [Google Colaboratory](https://colab.research.google.com) to reproduce the video here:

> SSD Tensorflow based car detection and tracking demo for OSSDC.org VisionBasedACC PS3/PS4 simulator

> https://youtu.be/dqnjHqwP68Y

Code here:

> https://github.com/OSSDC/OSSDC-VisionBasedACC

Make sure you enable GPU in Colaboratory using menu path Runtime -> Change runtime type

Join our efforts in [Open Source Self Driving Car Initiative](http://OSSDC.org)!



In [0]:
!apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python && pip install -q -U pafy && pip install -q -U imtools

In [0]:
import os
import math
#import random

import numpy as np
import tensorflow as tf
import cv2
import pafy

#from imutils.video import WebcamVideoStream

slim = tf.contrib.slim

%pylab inline 
from IPython.display import clear_output

In [0]:
#%matplotlib inline
#import matplotlib.pyplot as plt
#import matplotlib.image as mpimg
#from skimage import io
import time
import subprocess

precision = 10
from datetime import datetime

def getCurrentClock():
    #return time.clock()
    return datetime.now()


In [0]:
import sys
!rm master.zip
!wget https://github.com/OSSDC/OSSDC-VisionBasedACC/archive/master.zip
!unzip master.zip

sys.path.append('.')
import os
#os.listdir(path='.')

In [0]:
!cp -r OSSDC-VisionBasedACC-master/object_detection/* .

In [0]:
#os.listdir(path='.')

In [0]:
!cd checkpoints; rm *
!cd checkpoints; wget https://github.com/OSSDC/OSSDC-VisionBasedACC/raw/master/object_detection/checkpoints/ssd_300_vgg.ckpt.data-00000-of-00001

In [0]:
!cd checkpoints; wget https://github.com/OSSDC/OSSDC-VisionBasedACC/raw/master/object_detection/checkpoints/ssd_300_vgg.ckpt.index

In [0]:
!ls -ls checkpoints

from nets import ssd_vgg_300, ssd_common, np_methods

In [0]:
from preprocessing import ssd_vgg_preprocessing
import visualization

In [0]:
# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession(config=config)

## SSD 300 Model

The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).

SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors.

In [0]:
# Input placeholder.
net_shape = (300, 300)
data_format = 'NCHW'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

# Restore SSD model.
ckpt_filename = './checkpoints/ssd_300_vgg.ckpt'
# ckpt_filename = './checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)

INFO:tensorflow:Restoring parameters from ./checkpoints/ssd_300_vgg.ckpt


INFO:tensorflow:Restoring parameters from ./checkpoints/ssd_300_vgg.ckpt


## Post-processing pipeline

The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:

* Select boxes above a classification threshold;
* Clip boxes to the image shape;
* Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;
* If necessary, resize bounding boxes to original image shape.

In [0]:
# Main image processing routine.
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
    # Run SSD network.
    rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
                                                              feed_dict={img_input: img})
    
    # Get classes and bboxes from the net outputs.
    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions, rlocalisations, ssd_anchors,
            select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
    
    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
    rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
    return rclasses, rscores, rbboxes

In [0]:
!rm video-test.mp4

In [0]:
import time

start_time = time.time()

#A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps
url = 'https://www.youtube.com/watch?v=uuQlMCMT71I'

#Bad weather sample
#url = "https://www.youtube.com/watch?v=q3q26xrigX4"

#url= ... #put your Youtube video URL here and uncomment the line
def getVideoURL(url):
    videoUrl = url
    video = pafy.new(url)
    streams = video.streams
    videoUrlList={}
    for s in streams:
        videoUrlList[s.resolution] = s.url
        #print(s.resolution, s.extension, s.get_filesize(), s.url)

    if videoUrlList.get("1280x720",None) is not None:
        videoUrl = videoUrlList.get("1280x720",None)
        print("1280x720")

    if videoUrlList.get("1920x1080",None) is not None:
        videoUrl = videoUrlList.get("1920x1080",None)
        print("1920x1080")
    return videoUrl

origVideoUrl = url

if "youtube." in url: 
    videoUrl = getVideoURL(url)
    !wget -q -O video-test.mp4 '$videoUrl'
else:
    videoUrl = url

print("videoUrl =",videoUrl)
videoUrl='video-test.mp4'

In [0]:
webcam=False
#webcam=True

if webcam:
    cap = WebcamVideoStream(videoUrl).start()
else:
    cap = cv2.VideoCapture(videoUrl)

count=50
skip=0
SKIP_EVERY=150 #pick a frame every 5 seconds

count=1000 #look only at first 1000 frames
skip=0
SKIP_EVERY=0

every=SKIP_EVERY
initial_time = getCurrentClock()
flag=True

frameCnt=0
prevFrameCnt=0
prevTime=getCurrentClock()

showImage=False
showImage=True

processImage=False
processImage=True

zoomImage=0
rclasses = []
rscores = []
rbboxes = []

record = False
#record = True #uncomment line to get a video detections marked in place

procWidth = 1280 #640   # processing width (x resolution) of frame
procHeight = 720   # processing width (x resolution) of frame

out = None
if record:
    fourcc = cv2.VideoWriter_fourcc(*'MPEG')
    timestr = time.strftime("%Y%m%d-%H%M%S")
    out = cv2.VideoWriter('output-'+timestr+'.mp4',fourcc, 30.0, (int(procWidth),int(procHeight)))
frame = cap.read()
print("Start procesing!",frame)
try:
    while True:
        if webcam or cap.grab():
            if webcam:
                frame = cap.read()
            else:
                flag, frame = cap.retrieve()    
            if not flag:
                continue
            else:
                #print("frameCnt:",frameCnt)
                frameCnt=frameCnt+1
                nowMicro = getCurrentClock()
                delta = (nowMicro-prevTime).total_seconds()
                #print("%f " % (delta))
                if delta>=1.0:
                    #print("FPS = %0.4f" % ((frameCnt-prevFrameCnt)/delta))
                    prevTime = nowMicro
                    prevFrameCnt=frameCnt

                if skip>0:
                    skip=skip-1
                    continue

                if every>0:
                    every=every-1
                    continue
                every=SKIP_EVERY

                count=count-1
                if count==0:
                    break

                img = frame
                if processImage:    
                    if zoomImage>0:
                        #crop center of image, crop width is output_side_length
                        output_side_length = int(procWidth/zoomImage)
                        height, width, depth = frame.shape
                        #print (height, width, depth)
                        height_offset = int((height - output_side_length) / 2)
                        width_offset = int((width - output_side_length) / 2)
                        #print (height, width, depth, height_offset,width_offset,output_side_length)
                        img = frame[height_offset:height_offset + output_side_length,width_offset:width_offset + output_side_length]

                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    start_time = getCurrentClock()
                    rclasses, rscores, rbboxes =  process_image(img)
                    if len(rclasses)>0:
                        nowMicro = getCurrentClock()
                        print("# %s - %s - %0.4f seconds ---" % (frameCnt,rclasses.astype('|S3'), (nowMicro - start_time).total_seconds()))
                        start_time = nowMicro
                    
                    #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
                if showImage:
                    imshow(img)
                    show()
                    # Display the frame until new frame is available
                    clear_output(wait=True)
                if record:
                    if processImage:
                        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    newimage = cv2.resize(img,(procWidth,procHeight))
                    out.write(newimage)
except KeyboardInterrupt:
    # Release the Video Device
    vid.release()
    # Message to be displayed after releasing the device
    print ("Released Video Resource")
    
nowMicro = getCurrentClock()
print("# %s -- %0.4f seconds - FPS: %0.4f ---" % (frameCnt, (nowMicro - initial_time).total_seconds(), frameCnt/(nowMicro - initial_time).total_seconds()))
cap.release()
if record:
    out.release()

NameError: ignored

In [0]:
!ls -la
!pwd

total 291728
drwxr-xr-x 1 root root      4096 Jan 20 18:40 .
drwxr-xr-x 1 root root      4096 Jan 20 12:44 ..
drwx------ 4 root root      4096 Jan 20 12:46 .cache
drwxr-xr-x 2 root root      4096 Jan 20 12:49 checkpoints
-rw-r--r-- 1 root root      6701 Jan 20 12:48 common.py
drwxr-xr-x 3 root root      4096 Jan 20 12:46 .config
-rw-r--r-- 1 root root      2053 Jan 20 12:48 dac-viewer.py
drwxr-xr-x 2 root root      4096 Jan 20 12:48 data
drwxr-xr-x 1 root root      4096 Jan 20 13:14 datalab
drwxr-xr-x 4 root root      4096 Jan 20 12:45 .forever
-rw-r--r-- 1 root root       821 Jan 20 12:48 FPS.py
drwxr-xr-x 5 root root      4096 Jan 20 12:46 .ipython
drwxr-xr-x 2 root root      4096 Jan 20 12:47 .keras
-rw-r--r-- 1 root root      4628 Jan 20 12:48 lk_track-crop.py
drwx------ 3 root root      4096 Jan 20 12:45 .local
-rw-r--r-- 1 root root   1491285 Jan 20 18:31 master.zip
-rw-r--r-- 1 root root   1491285 Jan 20 12:48 master.zip.1
drwxr-xr-x 2 root root      4096 Jan 2