In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from darkflow.net.build import TFNet
import cv2
import tensorflow as tf
import os
import math
import xml.etree.ElementTree as ET
import time

In [2]:
options2 = {
    'gpu': 1.0,
    'model': 'cfg/yolov2-tiny-voc-hand.cfg',
    'load': 79440,
    'threshold': 0.1,
    'backup': 'E:/ar_fps/object_detection/ckpt',
}
tfnet2 = TFNet(options2)
tfnet2.load_from_ckpt()

Parsing cfg/yolov2-tiny-voc-hand.cfg
Loading None ...
Finished in 0.0s

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
Instructions for updating:
Colocations handled automatically by placer.
       |        | input                            | (?, 416, 416, 3)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 416, 416, 16)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 208, 208, 16)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 208, 208, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 104, 104, 32)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 104, 104, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 52, 52, 64)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 52, 52, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 26, 26, 128)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 26, 26, 256)
 L

In [3]:
def read_xml(xml_file):
        boxes = []
        classes = []
        data = ET.parse(xml_file)
        root = data.getroot()
        objs = root.findall('object')
        for obj in objs:
            box = obj.find('bndbox')
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            box = [xmin, ymin, xmax, ymax]
            boxes.append(box)
            name = obj.find('name')
            classes.append(name.text)
        return boxes, classes

In [4]:
direc = os.path.join('new_data', 'egohands', 'train')
# direc = 'E:/ar_fps/action_perception/frames'
image_names = os.listdir(direc)
# image_annots = os.listdir(direc + '_annotations')
annot_direc = direc + '_annotations'



imgs = []
num_detected = 0

orig_height = 416
orig_width = 416

model_time = 0
total_time = 0
total_no_write_time = 0

cap = cv2.VideoCapture(0)

i = 0
while True:
    if i == len(image_names):
        break
    img_name = image_names[i]
#     img = cv2.imread(os.path.join(direc, img_name))
#     img = cv2.resize(img, (640, 480))
    start_time = time.time()
#     ret, img = cap.read()
    img = cv2.imread(os.path.join(direc, img_name))
    height, width, _ = img.shape
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    model_start_time = time.time()
    results = tfnet2.return_predict(img)
    model_end_time = time.time()
    pred_time = model_end_time - model_start_time
    model_time += pred_time
    if len(results) != 0:
        num_detected += 1
        for res in results:
#             print(res)
            top_left = (res['topleft']['x'], res['topleft']['y'])
            bottom_right = (res['bottomright']['x'], res['bottomright']['y'])
            label = res['label']
            confidence = res['confidence']
            top_left_conf = (res['topleft']['x'], res['topleft']['y'] + 30)
            img = cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 7)
            img = cv2.putText(img, label, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 1)
            img = cv2.putText(img, str(confidence), top_left_conf, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 1)
    # ground truth
#     annot = os.path.join(annot_direc, img_name[:-3] + 'xml')
#     if os.path.exists(annot):
#         boxes, classes = read_xml(annot)
#         for box in boxes:
#             top_left = (box[0], box[1])
#             bottom_right = (box[2], box[3])
#             img = cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 7)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    imgs.append(img)
    no_write_end_time = time.time()
    pred_time = no_write_end_time - start_time
    total_no_write_time += pred_time
    cv2.imshow('img', img)
#     cv2.imwrite('output/img{}.jpg'.format(i), img)
    end_time = time.time()
    pred_time = end_time - start_time
    total_time += pred_time
    i += 1

    k = cv2.waitKey(30) & 0xFF
    if k == ord('q'):
        break

cv2.destroyAllWindows()
cap.release()
num_images = len(image_names)
height, width, channels = img.shape
print('Image resolution: {}x{}'.format(height, width))
print('Num images found: {} out of {} => {}'.format(num_detected, i, num_detected / i))
print('Total time: {}'.format(total_time))
avg_model_time = model_time / i
print('Avg model time: {} or {} fps'.format(avg_model_time, 1 / avg_model_time))
avg_no_write_time = total_no_write_time / i
print('Avg no write time: {} or {} fps'.format(avg_no_write_time, 1 / avg_no_write_time))
avg_time = total_time / i
print('Avg time: {} or {} fps'.format(avg_time, 1 / avg_time))

Image resolution: 480x640
Num images found: 371 out of 433 => 0.8568129330254042
Total time: 30.420039653778076
Avg model time: 0.06747898273600167 or 14.819429094127049 fps
Avg no write time: 0.06992426700459892 or 14.301186738707322 fps
Avg time: 0.07025413314960295 or 14.23403798706826 fps


In [5]:
print(os.path.exists('a'))
# plotImages(imgs)

False
