In [35]:
import cv2
import imageio
imageio.plugins.ffmpeg.download()
from PIL import Image, ImageDraw, ImageFont

In [18]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

%matplotlib inline

In [19]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6):
    
    box_scores = box_confidence*box_class_probs
    
    box_classes = K.argmax(box_scores,axis=-1)
    box_class_scores = K.max(box_scores,axis=-1)
    
    filtering_mask = box_class_scores>threshold
    
    scores = tf.boolean_mask(box_class_scores,filtering_mask)
    boxes = tf.boolean_mask(boxes,filtering_mask)
    classes = tf.boolean_mask(box_classes,filtering_mask)
    
    return scores, boxes, classes

In [20]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
   
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) 
    
    nms_indices = tf.image.non_max_suppression(boxes,scores,max_boxes_tensor,iou_threshold)
    scores = K.gather(scores,nms_indices)
    boxes = K.gather(boxes,nms_indices)
    classes = K.gather(classes,nms_indices)
    
    return scores, boxes, classes

In [21]:
def yolo_eval(yolo_outputs, image_shape = (720., 1280.), max_boxes=10, score_threshold=.4, iou_threshold=.5):
    
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs

    boxes = yolo_boxes_to_corners(box_xy, box_wh)

    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = score_threshold)
    
    boxes = scale_boxes(boxes, image_shape)

    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)

    return scores, boxes, classes

In [22]:
sess=K.get_session()

In [23]:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)    

In [24]:
yolo_model = load_model("model_data/yolo.h5")



In [25]:
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))

In [26]:
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

In [42]:
def predict(sess, image_file):
    
    image, image_data = preprocess_image("images/" + image_file, model_image_size = (608, 608))
    out_scores, out_boxes, out_classes = sess.run([scores,boxes,classes],feed_dict={yolo_model.input:image_data,K.learning_phase():0})
    print('Found {} boxes for {}'.format(len(out_boxes), image_file))
    colors = generate_colors(class_names)
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    image.save(os.path.join("out", image_file), quality=90)
    output_image = scipy.misc.imread(os.path.join("out", image_file))
    imshow(output_image)
    
    return out_scores, out_boxes, out_classes

In [None]:
out_scores, out_boxes, out_classes = predict(sess, "test.jpg")

In [43]:
def predict(sess, image_np):
    
    image, image_data = preprocess_image(image_np, model_image_size = (608, 608))

    out_scores, out_boxes, out_classes = sess.run([scores,boxes,classes],feed_dict={yolo_model.input:image_data,K.learning_phase():0})

    return out_scores, out_boxes, out_classes

In [45]:
input_video="trial3"
video_reader=imageio.get_reader('%s.mp4'%input_video)
video_writer=imageio.get_writer('%s_annotated.mp4'% input_video,fps=10)
for frame in video_reader:
    image_np=frame
    out_scores, out_boxes, out_classes = predict(sess,image_np)
    colors = generate_colors(class_names)
    image = Image.fromarray(image_np, 'RGB')
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    image_np=np.array(image)
    video_writer.append_data(image_np)
video_writer.close()



tvmonitor 0.40 (1201, 75) (720, 175)
cup 0.42 (687, 546) (720, 404)
cup 0.44 (597, 542) (656, 404)
bottle 0.53 (513, 401) (540, 404)
sofa 0.59 (2, 265) (371, 404)
person 0.72 (692, 206) (720, 404)
person 0.75 (423, 187) (612, 404)
chair 0.79 (945, 294) (720, 404)
person 0.83 (249, 232) (470, 404)
tvmonitor 0.40 (1201, 75) (720, 175)
cup 0.42 (687, 546) (720, 404)
cup 0.44 (597, 542) (656, 404)
bottle 0.53 (513, 401) (540, 404)
sofa 0.59 (2, 265) (371, 404)
person 0.72 (692, 206) (720, 404)
person 0.75 (423, 187) (612, 404)
chair 0.79 (945, 294) (720, 404)
person 0.83 (249, 232) (470, 404)
tvmonitor 0.41 (1201, 75) (720, 175)
cup 0.42 (687, 545) (720, 404)
cup 0.44 (597, 542) (655, 404)
bottle 0.52 (513, 401) (540, 404)
sofa 0.59 (1, 264) (371, 404)
person 0.70 (695, 209) (720, 404)
person 0.74 (423, 187) (612, 404)
chair 0.79 (946, 294) (720, 404)
person 0.84 (249, 231) (469, 404)
tvmonitor 0.41 (1201, 75) (720, 175)
cup 0.42 (687, 545) (720, 404)
cup 0.44 (597, 542) (655, 404)
bottle 

cup 0.54 (686, 543) (720, 404)
sofa 0.54 (0, 264) (381, 404)
person 0.69 (682, 206) (720, 404)
person 0.73 (432, 216) (607, 404)
person 0.78 (928, 46) (720, 355)
person 0.80 (248, 235) (465, 404)
chair 0.80 (943, 299) (720, 404)
