From 26fa7eea5155ac3989476936628e62be3d773b95 Mon Sep 17 00:00:00 2001
From: Marius Slavescu <marius@slavescu.net>
Date: Thu, 20 Apr 2017 08:18:09 -0400
Subject: [PATCH] SSD Tensorflow scripts for OSSDC.org Vision Based Adaptive
 cruise control, later this code will be moved here:
 https://github.com/OSSDC/OSSDC-VisionBasedACC A video with the results is
 here: https://www.youtube.com/watch?v=dqnjHqwP68Y

Run this script (change the URL insde to point to other YouTube video):

        cd SSD-Tensorflow/notebooks
        python3 ossdc-vbacc-ssd_notebook_cpu.py

to see the SSD detection results against this video:

        #A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps
        https://www.youtube.com/watch?v=uuQlMCMT71I

Use the IPython Notebook in Jupyter, against same video, with inline live image result, all in browser:
        ossdc-vbacc-ssd_notebook.ipynb

These scripts are using GPU by default it will need some customizations for CPU.

To test it directly on Youtube videos download youtube-ld (used above in both scripts) from: https://rg3.github.io/youtube-dl/download.html
---
 notebooks/ossdc-vbacc-ssd_notebook.ipynb  | 460 ++++++++++++++++++++++
 notebooks/ossdc-vbacc-ssd_notebook_cpu.py | 314 +++++++++++++++
 2 files changed, 774 insertions(+)
 create mode 100644 notebooks/ossdc-vbacc-ssd_notebook.ipynb
 create mode 100644 notebooks/ossdc-vbacc-ssd_notebook_cpu.py

diff --git a/notebooks/ossdc-vbacc-ssd_notebook.ipynb b/notebooks/ossdc-vbacc-ssd_notebook.ipynb
new file mode 100644
index 00000000..03dcdbd0
--- /dev/null
+++ b/notebooks/ossdc-vbacc-ssd_notebook.ipynb
@@ -0,0 +1,460 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import math\n",
+    "#import random\n",
+    "\n",
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "import cv2\n",
+    "import os\n",
+    "\n",
+    "from imutils.video import WebcamVideoStream\n",
+    "\n",
+    "slim = tf.contrib.slim\n",
+    "\n",
+    "%pylab inline \n",
+    "from IPython.display import clear_output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "#%matplotlib inline\n",
+    "#import matplotlib.pyplot as plt\n",
+    "#import matplotlib.image as mpimg\n",
+    "from skimage import io\n",
+    "import time\n",
+    "import subprocess\n",
+    "\n",
+    "precision = 10\n",
+    "from datetime import datetime\n",
+    "\n",
+    "def getCurrentClock():\n",
+    "    #return time.clock()\n",
+    "    return datetime.now()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from nets import ssd_vgg_300, ssd_common, np_methods\n",
+    "from preprocessing import ssd_vgg_preprocessing\n",
+    "from notebooks import visualization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!\n",
+    "gpu_options = tf.GPUOptions(allow_growth=True)\n",
+    "config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)\n",
+    "isess = tf.InteractiveSession(config=config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "## SSD 300 Model\n",
+    "\n",
+    "The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).\n",
+    "\n",
+    "SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true,
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Input placeholder.\n",
+    "net_shape = (300, 300)\n",
+    "data_format = 'NCHW'\n",
+    "img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))\n",
+    "# Evaluation pre-processing: resize to SSD net shape.\n",
+    "image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(\n",
+    "    img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)\n",
+    "image_4d = tf.expand_dims(image_pre, 0)\n",
+    "\n",
+    "# Define the SSD model.\n",
+    "reuse = True if 'ssd_net' in locals() else None\n",
+    "ssd_net = ssd_vgg_300.SSDNet()\n",
+    "with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):\n",
+    "    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)\n",
+    "\n",
+    "# Restore SSD model.\n",
+    "ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'\n",
+    "# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'\n",
+    "isess.run(tf.global_variables_initializer())\n",
+    "saver = tf.train.Saver()\n",
+    "saver.restore(isess, ckpt_filename)\n",
+    "\n",
+    "# SSD default anchor boxes.\n",
+    "ssd_anchors = ssd_net.anchors(net_shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "## Post-processing pipeline\n",
+    "\n",
+    "The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:\n",
+    "\n",
+    "* Select boxes above a classification threshold;\n",
+    "* Clip boxes to the image shape;\n",
+    "* Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;\n",
+    "* If necessary, resize bounding boxes to original image shape."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Main image processing routine.\n",
+    "def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):\n",
+    "    # Run SSD network.\n",
+    "    rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],\n",
+    "                                                              feed_dict={img_input: img})\n",
+    "    \n",
+    "    # Get classes and bboxes from the net outputs.\n",
+    "    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(\n",
+    "            rpredictions, rlocalisations, ssd_anchors,\n",
+    "            select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)\n",
+    "    \n",
+    "    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)\n",
+    "    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)\n",
+    "    rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)\n",
+    "    # Resize bboxes to original image shape. Note: useless for Resize.WARP!\n",
+    "    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)\n",
+    "    return rclasses, rscores, rbboxes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'vid' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-8-7070820eaf3c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    107\u001b[0m                     \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetCurrentClock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m                     \u001b[0mrclasses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrscores\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrbboxes\u001b[0m \u001b[0;34m=\u001b[0m  \u001b[0mprocess_image\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    109\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrclasses\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-7-38d1654ffec4>\u001b[0m in \u001b[0;36mprocess_image\u001b[0;34m(img, select_threshold, nms_threshold, net_shape)\u001b[0m\n\u001b[1;32m      4\u001b[0m     rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],\n\u001b[0;32m----> 5\u001b[0;31m                                                               feed_dict={img_input: img})\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    766\u001b[0m       result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 767\u001b[0;31m                          run_metadata_ptr)\n\u001b[0m\u001b[1;32m    768\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    964\u001b[0m       results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 965\u001b[0;31m                              feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m    966\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m   1014\u001b[0m       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1015\u001b[0;31m                            target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m   1016\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m   1021\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1022\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1023\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m   1003\u001b[0m                                  \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m                                  status, run_metadata)\n\u001b[0m\u001b[1;32m   1005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-8-7070820eaf3c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    145\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    146\u001b[0m     \u001b[0;31m# Release the Video Device\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m     \u001b[0mvid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelease\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    148\u001b[0m     \u001b[0;31m# Message to be displayed after releasing the device\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    149\u001b[0m     \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"Released Video Resource\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'vid' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "# Test on some demo image and visualize output.\n",
+    "#path = '../demo/'\n",
+    "#image_names = sorted(os.listdir(path))\n",
+    "\n",
+    "img = io.imread(\"http://www.searchamateur.com/pictures/street-cars-second-life.jpg\") #not all detected\n",
+    "\n",
+    "start_time = time.time()\n",
+    "rclasses, rscores, rbboxes =  process_image(img)\n",
+    "# visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)\n",
+    "visualization.plt_bboxes(img, rclasses, rscores, rbboxes)\n",
+    "print(\"--- %s seconds ---\" % (time.time() - start_time))\n",
+    "\n",
+    "#A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps\n",
+    "videoUrl = subprocess.Popen(\"youtube-dl -f22 -g https://www.youtube.com/watch?v=uuQlMCMT71I\", shell=True, stdout=subprocess.PIPE).stdout.read()\n",
+    "videoUrl = videoUrl.decode(\"utf-8\").rstrip()\n",
+    "print(\"videoUrl =\",videoUrl)\n",
+    "\n",
+    "webcam=False\n",
+    "#webcam=True\n",
+    "\n",
+    "if webcam:\n",
+    "    cap = WebcamVideoStream(videoUrl).start()\n",
+    "else:\n",
+    "    cap = cv2.VideoCapture(videoUrl)\n",
+    "\n",
+    "count=50\n",
+    "skip=1000\n",
+    "SKIP_EVERY=150 #pick a frame every 5 seconds\n",
+    "\n",
+    "count=1000000\n",
+    "skip=0 #int(7622-5)\n",
+    "SKIP_EVERY=0\n",
+    "\n",
+    "every=SKIP_EVERY\n",
+    "initial_time = getCurrentClock()\n",
+    "flag=True\n",
+    "\n",
+    "frameCnt=0\n",
+    "prevFrameCnt=0\n",
+    "prevTime=getCurrentClock()\n",
+    "\n",
+    "showImage=False\n",
+    "showImage=True\n",
+    "processImage=False\n",
+    "processImage=True\n",
+    "zoomImage=0\n",
+    "rclasses = []\n",
+    "rscores = []\n",
+    "rbboxes = []\n",
+    "\n",
+    "record = False\n",
+    "#record = True\n",
+    "\n",
+    "out = None\n",
+    "if record:\n",
+    "    fourcc = cv2.VideoWriter_fourcc(*'MPEG')\n",
+    "    timestr = time.strftime(\"%Y%m%d-%H%M%S\")\n",
+    "    out = cv2.VideoWriter('output-'+timestr+'.mp4',fourcc, 30.0, (int(procWidth),int(procHeight)))\n",
+    "    \n",
+    "try:\n",
+    "    while True:\n",
+    "        #frame = cap.read()\n",
+    "        #if True:\n",
+    "        if webcam or cap.grab():\n",
+    "            if webcam:\n",
+    "                frame = cap.read()\n",
+    "            else:\n",
+    "                flag, frame = cap.retrieve()    \n",
+    "            if not flag:\n",
+    "                continue\n",
+    "            else:\n",
+    "                frameCnt=frameCnt+1\n",
+    "                nowMicro = getCurrentClock()\n",
+    "                delta = (nowMicro-prevTime).total_seconds()\n",
+    "                #print(\"%f \" % (delta))\n",
+    "                if delta>=1.0:\n",
+    "                    #print(\"FPS = %0.4f\" % ((frameCnt-prevFrameCnt)/delta))\n",
+    "                    prevTime = nowMicro\n",
+    "                    prevFrameCnt=frameCnt\n",
+    "\n",
+    "                if skip>0:\n",
+    "                    skip=skip-1\n",
+    "                    continue\n",
+    "\n",
+    "                if every>0:\n",
+    "                    every=every-1\n",
+    "                    continue\n",
+    "                every=SKIP_EVERY\n",
+    "\n",
+    "                count=count-1\n",
+    "                if count==0:\n",
+    "                    break\n",
+    "\n",
+    "                img = frame\n",
+    "                if processImage:    \n",
+    "                    if zoomImage>0:\n",
+    "                        #crop center of image, crop width is output_side_length\n",
+    "                        output_side_length = int(1920/zoomImage)\n",
+    "                        height, width, depth = frame.shape\n",
+    "                        #print (height, width, depth)\n",
+    "                        height_offset = int((height - output_side_length) / 2)\n",
+    "                        width_offset = int((width - output_side_length) / 2)\n",
+    "                        #print (height, width, depth, height_offset,width_offset,output_side_length)\n",
+    "                        img = frame[height_offset:height_offset + output_side_length,width_offset:width_offset + output_side_length]\n",
+    "\n",
+    "                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
+    "                    start_time = getCurrentClock()\n",
+    "                    rclasses, rscores, rbboxes =  process_image(img)\n",
+    "                    if len(rclasses)>0:\n",
+    "                        nowMicro = getCurrentClock()\n",
+    "                        #print(\"# %s - %s - %0.4f seconds ---\" % (frameCnt,rclasses.astype('|S3'), (nowMicro - start_time).total_seconds()))\n",
+    "                        start_time = nowMicro\n",
+    "                    if showImage:\n",
+    "                        visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)\n",
+    "                if showImage:\n",
+    "                    #visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)\n",
+    "                    #visualization.plt_bboxes(img, rclasses, rscores, rbboxes)\n",
+    "                    if processImage:\n",
+    "                        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n",
+    "                    #cv2.imshow(\"ssd\",img)\n",
+    "                    imshow(img)\n",
+    "                    show()\n",
+    "                    # Display the frame until new frame is available\n",
+    "                    clear_output(wait=True)\n",
+    "                if record:\n",
+    "                    #if processImage:\n",
+    "                        #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n",
+    "                    newimage = cv2.resize(img,(procWidth,procHeight))\n",
+    "                    out.write(newimage)\n",
+    "        key = cv2.waitKey(1)\n",
+    "        if  key == 27:\n",
+    "            break\n",
+    "        elif key == ord('u'):\n",
+    "            showImage= not(showImage)\n",
+    "        elif key == ord('p'):\n",
+    "            processImage= not(processImage)\n",
+    "        elif key == ord('z'):\n",
+    "            zoomImage=zoomImage+1\n",
+    "            if zoomImage==10:\n",
+    "                zoomImage=0\n",
+    "        elif key == ord('x'):\n",
+    "            zoomImage=zoomImage-1\n",
+    "            if zoomImage<0:\n",
+    "                zoomImage=0\n",
+    "except KeyboardInterrupt:\n",
+    "    # Release the Video Device\n",
+    "    vid.release()\n",
+    "    # Message to be displayed after releasing the device\n",
+    "    print (\"Released Video Resource\")\n",
+    "    \n",
+    "nowMicro = getCurrentClock()\n",
+    "print(\"# %s -- %0.4f seconds - FPS: %0.4f ---\" % (frameCnt, (nowMicro - initial_time).total_seconds(), frameCnt/(nowMicro - initial_time).total_seconds()))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/ossdc-vbacc-ssd_notebook_cpu.py b/notebooks/ossdc-vbacc-ssd_notebook_cpu.py
new file mode 100644
index 00000000..866f7dd4
--- /dev/null
+++ b/notebooks/ossdc-vbacc-ssd_notebook_cpu.py
@@ -0,0 +1,314 @@
+
+# coding: utf-8
+
+# In[1]:
+
+import os
+import math
+import random
+
+import numpy as np
+import tensorflow as tf
+import cv2
+import os
+import argparse
+
+
+from datetime import datetime
+
+slim = tf.contrib.slim
+
+# In[2]:
+
+#get_ipython().magic('matplotlib inline')
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+from skimage import io
+import time
+import subprocess
+
+
+# In[3]:
+
+import sys
+sys.path.append('../')
+
+precision = 10
+def getCurrentClock():
+    #return time.clock()
+    return datetime.now()
+
+# In[4]:
+
+from nets import ssd_vgg_300, ssd_vgg_512, ssd_common, np_methods
+from preprocessing import ssd_vgg_preprocessing
+from notebooks import visualization
+
+procWidth = 1280 #640   # processing width (x resolution) of frame
+procHeight = 720   # processing width (x resolution) of frame
+
+# In[5]:
+
+# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
+gpu_options = tf.GPUOptions(allow_growth=True)
+config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
+isess = tf.InteractiveSession(config=config)
+
+
+# ## SSD 300 Model
+# 
+# The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).
+# 
+# SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors.
+
+# In[6]:
+shapeWidth=512
+shapeHeight=512
+shapeWidth=300
+shapeHeight=300
+
+# Input placeholder.
+#net_shape = (300, 300)
+net_shape = (shapeWidth, shapeHeight)
+
+# construct the argument parser and parse the arguments
+ap = argparse.ArgumentParser()
+ap.add_argument("-v", "--video", help="path to the video file")
+
+args = vars(ap.parse_args())
+
+data_format = 'NHWC' #'NHWC' #'NCHW'
+img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
+# Evaluation pre-processing: resize to SSD net shape.
+image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
+    img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
+image_4d = tf.expand_dims(image_pre, 0)
+
+# Define the SSD model.
+reuse = True if 'ssd_net' in locals() else None
+if shapeWidth==300:
+    ssd_net = ssd_vgg_300.SSDNet()
+else:
+    ssd_net = ssd_vgg_512.SSDNet()
+
+with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
+    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)
+
+# Restore SSD model.
+if shapeWidth==300:
+    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
+else:
+    ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt.index'
+# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
+isess.run(tf.global_variables_initializer())
+saver = tf.train.Saver()
+saver.restore(isess, ckpt_filename)
+
+# SSD default anchor boxes.
+ssd_anchors = ssd_net.anchors(net_shape)
+
+
+# ## Post-processing pipeline
+# 
+# The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:
+# 
+# * Select boxes above a classification threshold;
+# * Clip boxes to the image shape;
+# * Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;
+# * If necessary, resize bounding boxes to original image shape.
+
+# In[7]:
+
+# Main image processing routine.
+def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(shapeWidth, shapeHeight)):
+    # Run SSD network.
+    rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
+                                                              feed_dict={img_input: img})
+    
+    # Get classes and bboxes from the net outputs.
+    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
+            rpredictions, rlocalisations, ssd_anchors,
+            select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
+    
+    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
+    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
+    rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
+    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
+    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
+    return rclasses, rscores, rbboxes
+
+
+# In[10]:
+
+
+start_time = getCurrentClock()
+
+#A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps
+videoUrl = subprocess.Popen("youtube-dl -f22 -g https://www.youtube.com/watch?v=uuQlMCMT71I", shell=True, stdout=subprocess.PIPE).stdout.read()
+videoUrl = videoUrl.decode("utf-8").rstrip()
+print("videoUrl =",videoUrl)
+
+# if the video argument is None, then we are reading from webcam
+#videoUrl = args.get("video", None)
+
+print("videoUrl=",videoUrl)
+
+from imutils.video import WebcamVideoStream
+
+webcam=False
+#webcam=True
+
+if webcam:
+    #cap = WebcamVideoStream(src=""+str(videoUrl)+"").start()
+    cap = WebcamVideoStream(videoUrl).start()
+else:
+    cap = cv2.VideoCapture(videoUrl)
+
+#cap = cv2.VideoCapture(videoUrl)
+
+count=50
+#skip=2000
+skip=0
+
+SKIP_EVERY=150 #pick a frame every 5 seconds
+
+count=1000000
+#skip=0 #int(7622-5)
+SKIP_EVERY=0
+
+every=SKIP_EVERY
+initial_time = getCurrentClock()
+flag=True
+
+frameCnt=0
+prevFrameCnt=0
+prevTime=getCurrentClock()
+
+showImage=False
+showImage=True
+processImage=False
+processImage=True
+zoomImage=0
+#zoomImage=True
+rclasses = []
+rscores = []
+rbboxes = []
+
+record = False
+#record = True
+
+out = None
+if record:
+    fourcc = cv2.VideoWriter_fourcc(*'MPEG')
+    timestr = time.strftime("%Y%m%d-%H%M%S")
+    out = cv2.VideoWriter('output-'+timestr+'.mp4',fourcc, 30.0, (int(procWidth),int(procHeight)))
+
+#output_side_length = int(1920/zoomImage)
+
+#height_offset = int((height - output_side_length) / 2)
+#width_offset = int((width - output_side_length) / 2)
+
+while True:
+    #frame = cap.read()
+    #if True:
+    if webcam or cap.grab():
+        if webcam:
+            frame = cap.read()
+        else:
+            flag, frame = cap.retrieve()    
+        if not flag:
+            continue
+        else:
+            frameCnt=frameCnt+1
+            nowMicro = getCurrentClock()
+            delta = (nowMicro-prevTime).total_seconds()
+            #print("%f " % (delta))
+            if delta>=1.0:
+                print("FPS = %0.4f" % ((frameCnt-prevFrameCnt)/delta))
+                prevTime = nowMicro
+                prevFrameCnt=frameCnt
+
+            if skip>0:
+                skip=skip-1
+                continue
+            
+            if every>0:
+                every=every-1
+                continue
+            every=SKIP_EVERY
+            
+            count=count-1
+            if count==0:
+                break
+
+            img = frame
+            if processImage:    
+                if zoomImage>0:
+                    #crop center of image, crop width is output_side_length
+                    output_side_length = int(1920/zoomImage)
+                    height, width, depth = frame.shape
+                    #print (height, width, depth)
+                    height_offset = int((height - output_side_length) / 2)
+                    width_offset = int((width - output_side_length) / 2)
+                    #print (height, width, depth, height_offset,width_offset,output_side_length)
+                    img = frame[height_offset:height_offset + output_side_length,width_offset:width_offset + output_side_length]
+                    
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                start_time = getCurrentClock()
+                rclasses, rscores, rbboxes =  process_image(img)
+                if len(rclasses)>0:
+                    nowMicro = getCurrentClock()
+                    print("# %s - %s - %0.4f seconds ---" % (frameCnt,rclasses.astype('|S3'), (nowMicro - start_time).total_seconds()))
+                    start_time = nowMicro
+                if showImage:
+                    visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
+            if showImage:
+                #visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
+                #visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
+                if processImage:
+                    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                cv2.imshow("ssd",img)
+            if record:
+                #if processImage:
+                    #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                newimage = cv2.resize(img,(procWidth,procHeight))
+                out.write(newimage)
+    key = cv2.waitKey(1)
+    if  key == 27:
+        break
+    elif key == ord('u'):
+        showImage= not(showImage)
+    elif key == ord('p'):
+        processImage= not(processImage)
+    elif key == ord('z'):
+        zoomImage=zoomImage+1
+        if zoomImage==10:
+            zoomImage=0
+    elif key == ord('x'):
+        zoomImage=zoomImage-1
+        if zoomImage<0:
+            zoomImage=0
+
+nowMicro = getCurrentClock()
+print("# %s -- %0.4f seconds - FPS: %0.4f ---" % (frameCnt, (nowMicro - initial_time).total_seconds(), frameCnt/(nowMicro - initial_time).total_seconds()))
+
+
+# In[ ]:
+
+
+
+
+# In[ ]:
+
+
+
+
+# In[ ]:
+
+
+
+
+# In[ ]:
+
+
+