From 26fa7eea5155ac3989476936628e62be3d773b95 Mon Sep 17 00:00:00 2001 From: Marius Slavescu Date: Thu, 20 Apr 2017 08:18:09 -0400 Subject: [PATCH] SSD Tensorflow scripts for OSSDC.org Vision Based Adaptive cruise control, later this code will be moved here: https://github.com/OSSDC/OSSDC-VisionBasedACC A video with the results is here: https://www.youtube.com/watch?v=dqnjHqwP68Y Run this script (change the URL insde to point to other YouTube video): cd SSD-Tensorflow/notebooks python3 ossdc-vbacc-ssd_notebook_cpu.py to see the SSD detection results against this video: #A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps https://www.youtube.com/watch?v=uuQlMCMT71I Use the IPython Notebook in Jupyter, against same video, with inline live image result, all in browser: ossdc-vbacc-ssd_notebook.ipynb These scripts are using GPU by default it will need some customizations for CPU. To test it directly on Youtube videos download youtube-ld (used above in both scripts) from: https://rg3.github.io/youtube-dl/download.html --- notebooks/ossdc-vbacc-ssd_notebook.ipynb | 460 ++++++++++++++++++++++ notebooks/ossdc-vbacc-ssd_notebook_cpu.py | 314 +++++++++++++++ 2 files changed, 774 insertions(+) create mode 100644 notebooks/ossdc-vbacc-ssd_notebook.ipynb create mode 100644 notebooks/ossdc-vbacc-ssd_notebook_cpu.py diff --git a/notebooks/ossdc-vbacc-ssd_notebook.ipynb b/notebooks/ossdc-vbacc-ssd_notebook.ipynb new file mode 100644 index 00000000..03dcdbd0 --- /dev/null +++ b/notebooks/ossdc-vbacc-ssd_notebook.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Populating the interactive namespace from numpy and matplotlib\n" + ] + } + ], + "source": [ + "import os\n", + "import math\n", + "#import random\n", + "\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "import cv2\n", + "import os\n", + "\n", + "from imutils.video import WebcamVideoStream\n", + "\n", + "slim = tf.contrib.slim\n", + "\n", + "%pylab inline \n", + "from IPython.display import clear_output" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "#%matplotlib inline\n", + "#import matplotlib.pyplot as plt\n", + "#import matplotlib.image as mpimg\n", + "from skimage import io\n", + "import time\n", + "import subprocess\n", + "\n", + "precision = 10\n", + "from datetime import datetime\n", + "\n", + "def getCurrentClock():\n", + " #return time.clock()\n", + " return datetime.now()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "from nets import ssd_vgg_300, ssd_common, np_methods\n", + "from preprocessing import ssd_vgg_preprocessing\n", + "from notebooks import visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!\n", + "gpu_options = tf.GPUOptions(allow_growth=True)\n", + "config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)\n", + "isess = tf.InteractiveSession(config=config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "source": [ + "## SSD 300 Model\n", + "\n", + "The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).\n", + "\n", + "SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": false + }, + "outputs": [], + "source": [ + "# Input placeholder.\n", + "net_shape = (300, 300)\n", + "data_format = 'NCHW'\n", + "img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))\n", + "# Evaluation pre-processing: resize to SSD net shape.\n", + "image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(\n", + " img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)\n", + "image_4d = tf.expand_dims(image_pre, 0)\n", + "\n", + "# Define the SSD model.\n", + "reuse = True if 'ssd_net' in locals() else None\n", + "ssd_net = ssd_vgg_300.SSDNet()\n", + "with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):\n", + " predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)\n", + "\n", + "# Restore SSD model.\n", + "ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'\n", + "# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'\n", + "isess.run(tf.global_variables_initializer())\n", + "saver = tf.train.Saver()\n", + "saver.restore(isess, ckpt_filename)\n", + "\n", + "# SSD default anchor boxes.\n", + "ssd_anchors = ssd_net.anchors(net_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Post-processing pipeline\n", + "\n", + "The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:\n", + "\n", + "* Select boxes above a classification threshold;\n", + "* Clip boxes to the image shape;\n", + "* Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;\n", + "* If necessary, resize bounding boxes to original image shape." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "# Main image processing routine.\n", + "def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):\n", + " # Run SSD network.\n", + " rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],\n", + " feed_dict={img_input: img})\n", + " \n", + " # Get classes and bboxes from the net outputs.\n", + " rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(\n", + " rpredictions, rlocalisations, ssd_anchors,\n", + " select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)\n", + " \n", + " rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)\n", + " rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)\n", + " rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)\n", + " # Resize bboxes to original image shape. Note: useless for Resize.WARP!\n", + " rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)\n", + " return rclasses, rscores, rbboxes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": true + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'vid' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetCurrentClock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m \u001b[0mrclasses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrscores\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrbboxes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprocess_image\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrclasses\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mprocess_image\u001b[0;34m(img, select_threshold, nms_threshold, net_shape)\u001b[0m\n\u001b[1;32m 4\u001b[0m rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],\n\u001b[0;32m----> 5\u001b[0;31m feed_dict={img_input: img})\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 766\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 767\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 768\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 964\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 965\u001b[0;31m feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1014\u001b[0m return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1015\u001b[0;31m target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m 1016\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1022\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1023\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m status, run_metadata)\n\u001b[0m\u001b[1;32m 1005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: ", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;31m# Release the Video Device\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m \u001b[0mvid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelease\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 148\u001b[0m \u001b[0;31m# Message to be displayed after releasing the device\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"Released Video Resource\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'vid' is not defined" + ] + } + ], + "source": [ + "# Test on some demo image and visualize output.\n", + "#path = '../demo/'\n", + "#image_names = sorted(os.listdir(path))\n", + "\n", + "img = io.imread(\"http://www.searchamateur.com/pictures/street-cars-second-life.jpg\") #not all detected\n", + "\n", + "start_time = time.time()\n", + "rclasses, rscores, rbboxes = process_image(img)\n", + "# visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)\n", + "visualization.plt_bboxes(img, rclasses, rscores, rbboxes)\n", + "print(\"--- %s seconds ---\" % (time.time() - start_time))\n", + "\n", + "#A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps\n", + "videoUrl = subprocess.Popen(\"youtube-dl -f22 -g https://www.youtube.com/watch?v=uuQlMCMT71I\", shell=True, stdout=subprocess.PIPE).stdout.read()\n", + "videoUrl = videoUrl.decode(\"utf-8\").rstrip()\n", + "print(\"videoUrl =\",videoUrl)\n", + "\n", + "webcam=False\n", + "#webcam=True\n", + "\n", + "if webcam:\n", + " cap = WebcamVideoStream(videoUrl).start()\n", + "else:\n", + " cap = cv2.VideoCapture(videoUrl)\n", + "\n", + "count=50\n", + "skip=1000\n", + "SKIP_EVERY=150 #pick a frame every 5 seconds\n", + "\n", + "count=1000000\n", + "skip=0 #int(7622-5)\n", + "SKIP_EVERY=0\n", + "\n", + "every=SKIP_EVERY\n", + "initial_time = getCurrentClock()\n", + "flag=True\n", + "\n", + "frameCnt=0\n", + "prevFrameCnt=0\n", + "prevTime=getCurrentClock()\n", + "\n", + "showImage=False\n", + "showImage=True\n", + "processImage=False\n", + "processImage=True\n", + "zoomImage=0\n", + "rclasses = []\n", + "rscores = []\n", + "rbboxes = []\n", + "\n", + "record = False\n", + "#record = True\n", + "\n", + "out = None\n", + "if record:\n", + " fourcc = cv2.VideoWriter_fourcc(*'MPEG')\n", + " timestr = time.strftime(\"%Y%m%d-%H%M%S\")\n", + " out = cv2.VideoWriter('output-'+timestr+'.mp4',fourcc, 30.0, (int(procWidth),int(procHeight)))\n", + " \n", + "try:\n", + " while True:\n", + " #frame = cap.read()\n", + " #if True:\n", + " if webcam or cap.grab():\n", + " if webcam:\n", + " frame = cap.read()\n", + " else:\n", + " flag, frame = cap.retrieve() \n", + " if not flag:\n", + " continue\n", + " else:\n", + " frameCnt=frameCnt+1\n", + " nowMicro = getCurrentClock()\n", + " delta = (nowMicro-prevTime).total_seconds()\n", + " #print(\"%f \" % (delta))\n", + " if delta>=1.0:\n", + " #print(\"FPS = %0.4f\" % ((frameCnt-prevFrameCnt)/delta))\n", + " prevTime = nowMicro\n", + " prevFrameCnt=frameCnt\n", + "\n", + " if skip>0:\n", + " skip=skip-1\n", + " continue\n", + "\n", + " if every>0:\n", + " every=every-1\n", + " continue\n", + " every=SKIP_EVERY\n", + "\n", + " count=count-1\n", + " if count==0:\n", + " break\n", + "\n", + " img = frame\n", + " if processImage: \n", + " if zoomImage>0:\n", + " #crop center of image, crop width is output_side_length\n", + " output_side_length = int(1920/zoomImage)\n", + " height, width, depth = frame.shape\n", + " #print (height, width, depth)\n", + " height_offset = int((height - output_side_length) / 2)\n", + " width_offset = int((width - output_side_length) / 2)\n", + " #print (height, width, depth, height_offset,width_offset,output_side_length)\n", + " img = frame[height_offset:height_offset + output_side_length,width_offset:width_offset + output_side_length]\n", + "\n", + " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", + " start_time = getCurrentClock()\n", + " rclasses, rscores, rbboxes = process_image(img)\n", + " if len(rclasses)>0:\n", + " nowMicro = getCurrentClock()\n", + " #print(\"# %s - %s - %0.4f seconds ---\" % (frameCnt,rclasses.astype('|S3'), (nowMicro - start_time).total_seconds()))\n", + " start_time = nowMicro\n", + " if showImage:\n", + " visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)\n", + " if showImage:\n", + " #visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)\n", + " #visualization.plt_bboxes(img, rclasses, rscores, rbboxes)\n", + " if processImage:\n", + " img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n", + " #cv2.imshow(\"ssd\",img)\n", + " imshow(img)\n", + " show()\n", + " # Display the frame until new frame is available\n", + " clear_output(wait=True)\n", + " if record:\n", + " #if processImage:\n", + " #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n", + " newimage = cv2.resize(img,(procWidth,procHeight))\n", + " out.write(newimage)\n", + " key = cv2.waitKey(1)\n", + " if key == 27:\n", + " break\n", + " elif key == ord('u'):\n", + " showImage= not(showImage)\n", + " elif key == ord('p'):\n", + " processImage= not(processImage)\n", + " elif key == ord('z'):\n", + " zoomImage=zoomImage+1\n", + " if zoomImage==10:\n", + " zoomImage=0\n", + " elif key == ord('x'):\n", + " zoomImage=zoomImage-1\n", + " if zoomImage<0:\n", + " zoomImage=0\n", + "except KeyboardInterrupt:\n", + " # Release the Video Device\n", + " vid.release()\n", + " # Message to be displayed after releasing the device\n", + " print (\"Released Video Resource\")\n", + " \n", + "nowMicro = getCurrentClock()\n", + "print(\"# %s -- %0.4f seconds - FPS: %0.4f ---\" % (frameCnt, (nowMicro - initial_time).total_seconds(), frameCnt/(nowMicro - initial_time).total_seconds()))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/ossdc-vbacc-ssd_notebook_cpu.py b/notebooks/ossdc-vbacc-ssd_notebook_cpu.py new file mode 100644 index 00000000..866f7dd4 --- /dev/null +++ b/notebooks/ossdc-vbacc-ssd_notebook_cpu.py @@ -0,0 +1,314 @@ + +# coding: utf-8 + +# In[1]: + +import os +import math +import random + +import numpy as np +import tensorflow as tf +import cv2 +import os +import argparse + + +from datetime import datetime + +slim = tf.contrib.slim + +# In[2]: + +#get_ipython().magic('matplotlib inline') +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +from skimage import io +import time +import subprocess + + +# In[3]: + +import sys +sys.path.append('../') + +precision = 10 +def getCurrentClock(): + #return time.clock() + return datetime.now() + +# In[4]: + +from nets import ssd_vgg_300, ssd_vgg_512, ssd_common, np_methods +from preprocessing import ssd_vgg_preprocessing +from notebooks import visualization + +procWidth = 1280 #640 # processing width (x resolution) of frame +procHeight = 720 # processing width (x resolution) of frame + +# In[5]: + +# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! +gpu_options = tf.GPUOptions(allow_growth=True) +config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) +isess = tf.InteractiveSession(config=config) + + +# ## SSD 300 Model +# +# The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation). +# +# SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors. + +# In[6]: +shapeWidth=512 +shapeHeight=512 +shapeWidth=300 +shapeHeight=300 + +# Input placeholder. +#net_shape = (300, 300) +net_shape = (shapeWidth, shapeHeight) + +# construct the argument parser and parse the arguments +ap = argparse.ArgumentParser() +ap.add_argument("-v", "--video", help="path to the video file") + +args = vars(ap.parse_args()) + +data_format = 'NHWC' #'NHWC' #'NCHW' +img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) +# Evaluation pre-processing: resize to SSD net shape. +image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( + img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) +image_4d = tf.expand_dims(image_pre, 0) + +# Define the SSD model. +reuse = True if 'ssd_net' in locals() else None +if shapeWidth==300: + ssd_net = ssd_vgg_300.SSDNet() +else: + ssd_net = ssd_vgg_512.SSDNet() + +with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): + predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) + +# Restore SSD model. +if shapeWidth==300: + ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' +else: + ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt.index' +# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' +isess.run(tf.global_variables_initializer()) +saver = tf.train.Saver() +saver.restore(isess, ckpt_filename) + +# SSD default anchor boxes. +ssd_anchors = ssd_net.anchors(net_shape) + + +# ## Post-processing pipeline +# +# The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps: +# +# * Select boxes above a classification threshold; +# * Clip boxes to the image shape; +# * Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold; +# * If necessary, resize bounding boxes to original image shape. + +# In[7]: + +# Main image processing routine. +def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(shapeWidth, shapeHeight)): + # Run SSD network. + rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], + feed_dict={img_input: img}) + + # Get classes and bboxes from the net outputs. + rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( + rpredictions, rlocalisations, ssd_anchors, + select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) + + rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) + rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) + rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) + # Resize bboxes to original image shape. Note: useless for Resize.WARP! + rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) + return rclasses, rscores, rbboxes + + +# In[10]: + + +start_time = getCurrentClock() + +#A smooth drive in The Crew on PS4 - OSSDC Simulator ACC Train 30fps +videoUrl = subprocess.Popen("youtube-dl -f22 -g https://www.youtube.com/watch?v=uuQlMCMT71I", shell=True, stdout=subprocess.PIPE).stdout.read() +videoUrl = videoUrl.decode("utf-8").rstrip() +print("videoUrl =",videoUrl) + +# if the video argument is None, then we are reading from webcam +#videoUrl = args.get("video", None) + +print("videoUrl=",videoUrl) + +from imutils.video import WebcamVideoStream + +webcam=False +#webcam=True + +if webcam: + #cap = WebcamVideoStream(src=""+str(videoUrl)+"").start() + cap = WebcamVideoStream(videoUrl).start() +else: + cap = cv2.VideoCapture(videoUrl) + +#cap = cv2.VideoCapture(videoUrl) + +count=50 +#skip=2000 +skip=0 + +SKIP_EVERY=150 #pick a frame every 5 seconds + +count=1000000 +#skip=0 #int(7622-5) +SKIP_EVERY=0 + +every=SKIP_EVERY +initial_time = getCurrentClock() +flag=True + +frameCnt=0 +prevFrameCnt=0 +prevTime=getCurrentClock() + +showImage=False +showImage=True +processImage=False +processImage=True +zoomImage=0 +#zoomImage=True +rclasses = [] +rscores = [] +rbboxes = [] + +record = False +#record = True + +out = None +if record: + fourcc = cv2.VideoWriter_fourcc(*'MPEG') + timestr = time.strftime("%Y%m%d-%H%M%S") + out = cv2.VideoWriter('output-'+timestr+'.mp4',fourcc, 30.0, (int(procWidth),int(procHeight))) + +#output_side_length = int(1920/zoomImage) + +#height_offset = int((height - output_side_length) / 2) +#width_offset = int((width - output_side_length) / 2) + +while True: + #frame = cap.read() + #if True: + if webcam or cap.grab(): + if webcam: + frame = cap.read() + else: + flag, frame = cap.retrieve() + if not flag: + continue + else: + frameCnt=frameCnt+1 + nowMicro = getCurrentClock() + delta = (nowMicro-prevTime).total_seconds() + #print("%f " % (delta)) + if delta>=1.0: + print("FPS = %0.4f" % ((frameCnt-prevFrameCnt)/delta)) + prevTime = nowMicro + prevFrameCnt=frameCnt + + if skip>0: + skip=skip-1 + continue + + if every>0: + every=every-1 + continue + every=SKIP_EVERY + + count=count-1 + if count==0: + break + + img = frame + if processImage: + if zoomImage>0: + #crop center of image, crop width is output_side_length + output_side_length = int(1920/zoomImage) + height, width, depth = frame.shape + #print (height, width, depth) + height_offset = int((height - output_side_length) / 2) + width_offset = int((width - output_side_length) / 2) + #print (height, width, depth, height_offset,width_offset,output_side_length) + img = frame[height_offset:height_offset + output_side_length,width_offset:width_offset + output_side_length] + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + start_time = getCurrentClock() + rclasses, rscores, rbboxes = process_image(img) + if len(rclasses)>0: + nowMicro = getCurrentClock() + print("# %s - %s - %0.4f seconds ---" % (frameCnt,rclasses.astype('|S3'), (nowMicro - start_time).total_seconds())) + start_time = nowMicro + if showImage: + visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) + if showImage: + #visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) + #visualization.plt_bboxes(img, rclasses, rscores, rbboxes) + if processImage: + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + cv2.imshow("ssd",img) + if record: + #if processImage: + #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + newimage = cv2.resize(img,(procWidth,procHeight)) + out.write(newimage) + key = cv2.waitKey(1) + if key == 27: + break + elif key == ord('u'): + showImage= not(showImage) + elif key == ord('p'): + processImage= not(processImage) + elif key == ord('z'): + zoomImage=zoomImage+1 + if zoomImage==10: + zoomImage=0 + elif key == ord('x'): + zoomImage=zoomImage-1 + if zoomImage<0: + zoomImage=0 + +nowMicro = getCurrentClock() +print("# %s -- %0.4f seconds - FPS: %0.4f ---" % (frameCnt, (nowMicro - initial_time).total_seconds(), frameCnt/(nowMicro - initial_time).total_seconds())) + + +# In[ ]: + + + + +# In[ ]: + + + + +# In[ ]: + + + + +# In[ ]: + + +