In [None]:
import imutils
import time
import cv2
import os
import colorsys
import glob
import numpy as np

from yolo.model import eval
from yolo.utils import letterbox_image

from collections import deque, defaultdict

from keras.models import load_model
from keras import backend as K

from PIL import ImageDraw, Image
from sklearn.utils.linear_assignment_ import linear_assignment

from numpy import dot


In [None]:
def box_iou(a, b):
    '''
    Helper funciton to calculate the ratio between intersection and the union of
    two boxes a and b
    a[0], a[1], a[2], a[3] <-> left, top, right, bottom
    '''
    
    w_intsec = np.maximum (0, (np.minimum(a[2], b[2]) - np.maximum(a[0], b[0])))
    h_intsec = np.maximum (0, (np.minimum(a[3], b[3]) - np.maximum(a[1], b[1])))
    s_intsec = w_intsec * h_intsec
    s_a = (a[2] - a[0])*(a[3] - a[1])
    s_b = (b[2] - b[0])*(b[3] - b[1])
  
    return float(s_intsec)/(s_a + s_b -s_intsec)

def convert_to_pixel(box_yolo, img, crop_range):
    '''
    Helper function to convert (scaled) coordinates of a bounding box 
    to pixel coordinates. 
    
    Example (0.89361443264143803, 0.4880486045564924, 0.23544462956491041, 
    0.36866588651069609)
    
    crop_range: specifies the part of image to be cropped
    '''
    
    box = box_yolo
    imgcv = img
    [xmin, xmax] = crop_range[0]
    [ymin, ymax] = crop_range[1]
    h, w, _ = imgcv.shape
    
    # Calculate left, top, width, and height of the bounding box
    left = int((box.x - box.w/2.)*(xmax - xmin) + xmin)
    top = int((box.y - box.h/2.)*(ymax - ymin) + ymin)
    
    width = int(box.w*(xmax - xmin))
    height = int(box.h*(ymax - ymin))
    
    # Deal with corner cases
    if left  < 0    :  left = 0
    if top   < 0    :   top = 0
    
    # Return the coordinates (in the unit of the pixels)
  
    box_pixel = np.array([left, top, width, height])
    return box_pixel

def ltwh_to_ltrb(bbox, img_dim = (960, 720)):
    '''
    Helper fucntion for converting bbox to bbox_cv2
    bbox = [left, top, width, height]
    bbox_cv2 = [left, top, right, bottom]
    img_dim: dimension of the image, img_dim[0]<-> x
    img_dim[1]<-> y
    '''
    left = np.maximum(0, bbox[0])
    top = np.maximum(0, bbox[1])
    right = np.minimum(img_dim[0], bbox[0] + bbox[2])
    bottom = np.minimum(img_dim[1], bbox[1] + bbox[3])
    
    return (int(left), int(top), int(right), int(bottom))

def ltrb_to_ltwh(bbox, img_dim = (960, 720)):
    '''
    Helper fucntion for converting bbox to bbox_cv2
    bbox_cv2 = [left, top, right, bottom]
    bbox = [left, top, width, height]
    img_dim: dimension of the image, img_dim[0]<-> x
    img_dim[1]<-> y
    '''
    left = np.maximum(0, bbox[0])
    top = np.maximum(0, bbox[1])
    width = np.minimum(img_dim[0], bbox[2] - bbox[0])
    height = np.minimum(img_dim[1], bbox[3] - bbox[1])
    
    return (int(left), int(top), int(width), int(height))

def tlbr_to_ltrb(bbox, img_dim = (960, 720)):

    left = np.maximum(0, bbox[1])
    top = np.maximum(0, bbox[0])
    right = np.minimum(img_dim[0], bbox[3])
    bottom = np.minimum(img_dim[1], bbox[2])
    
    return (int(left), int(top), int(right), int(bottom))

def draw_box_label(img, bbox_cv2, box_color=(0, 255, 255), show_label=False):
    '''
    Helper funciton for drawing the bounding boxes and the labels
    bbox_cv2 = [left, top, right, bottom]
    '''
    #box_color= (0, 255, 255)
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_size = 0.7
    font_color = (0, 0, 0)
    left, top, right, bottom = bbox_cv2[0], bbox_cv2[1], bbox_cv2[2], bbox_cv2[3]
    
    # Draw the bounding box
    cv2.rectangle(img, (left, top), (right, bottom), box_color, 4)
    
    if show_label:
        # Draw a filled box on top of the bounding box (as the background for the labels)
        cv2.rectangle(img, (left-2, top-45), (right+2, top), box_color, -1, 1)
        
        # Output the labels that show the x and y coordinates of the bounding box center.
        text_x= 'x='+str((left+right)/2)
        cv2.putText(img,text_x,(left,top-25), font, font_size, font_color, 1, cv2.LINE_AA)
        text_y= 'y='+str((top+bottom)/2)
        cv2.putText(img,text_y,(left,top-5), font, font_size, font_color, 1, cv2.LINE_AA)
    
    return img    

In [None]:
class YOLO(object):
    def __init__(self):
#         self.args = args
        self.model_path = 'model-weights/YOLO_Face.h5'#args.model
        self.classes_path = 'cfg/face_classes.txt'#args.classes
        self.anchors_path = 'cfg/yolo_anchors.txt'#args.anchors
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.sess = K.get_session()
        self.boxes, self.scores, self.classes = self._generate()
        self.model_image_size = (416, 416)#args.img_size

    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        print(class_names)
        return class_names

    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape(-1, 2)

    def _generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file'

        # Load model, or construct model and load weights
        num_anchors = len(self.anchors)
        num_classes = len(self.class_names)
        try:
            self.yolo_model = load_model(model_path, compile=False)
        except:
            # make sure model, anchors and classes match
            self.yolo_model.load_weights(self.model_path)
        else:
            assert self.yolo_model.layers[-1].output_shape[-1] == \
                   num_anchors / len(self.yolo_model.output) * (
                           num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'

        print(
            '[i] ==> {} model, anchors, and classes loaded.'.format(model_path))

        # Generate colors for drawing bounding boxes
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

        # Shuffle colors to decorrelate adjacent classes.
        np.random.seed(102)
        np.random.shuffle(self.colors)
        np.random.seed(None)

        # Generate output tensor targets for filtered bounding boxes.
        self.input_image_shape = K.placeholder(shape=(2,))
        boxes, scores, classes = eval(self.yolo_model.output, self.anchors,
                                           len(self.class_names),
                                           self.input_image_shape,
                                           score_threshold=0.5,
                                           iou_threshold=0.45)
        return boxes, scores, classes

    def detect_image(self, image):
        start_time = time.time()

        if self.model_image_size != (None, None):
            assert self.model_image_size[
                       0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                       1] % 32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image, tuple(
                reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        print(image_data.shape)
        image_data /= 255.
        # Add batch dimension
        image_data = np.expand_dims(image_data, 0)

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        print('[i] ==> Found {} face(s) for this image'.format(len(out_boxes)))
        thickness = (image.size[0] + image.size[1]) // 400

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            text = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            print(text, (left, top), (right, bottom))

#             for thk in range(thickness):
#                 draw.rectangle(
#                     [left + thk, top + thk, right - thk, bottom - thk],
#                     outline=(51, 178, 255))
#             del draw

        end_time = time.time()
        print('[i] ==> Processing time: {:.2f}ms'.format((end_time -
                                                          start_time) * 1000))
        return image, list(map(tuple,out_boxes))

    def close_session(self):
        self.sess.close()

In [None]:
class Tracker(): # class for Kalman Filter-based tracker
    def __init__(self):
        # Initialize parametes for tracker (history)
        self.id = 0  # tracker's id 
        self.box = (0,0,0,0) # list to store the coordinates for a bounding box 
        self.hits = 0 # number of detection matches
        self.no_losses = 0 # number of unmatched tracks (track loss)
        
        self.tracker = cv2.TrackerCSRT_create()
        self.is_init = False
    
    def update(self, frame):
        a,b = self.tracker.update(frame)
        return (a,b)
    
    def init(self, image, box):
        self.tracker.init(image, box)
        self.is_init = True
    
    def release(self):
#         self.tracker.release()
        self.tracker.clear()

In [None]:
def assign_detections_to_trackers(trackers, detections, iou_thrd = 0.3):
    '''
    From current list of trackers and new detections, output matched detections,
    unmatchted trackers, unmatched detections.
    '''    
    
    IOU_mat= np.zeros((len(trackers),len(detections)),dtype=np.float32)
    for t,trk in enumerate(trackers):
        for d,det in enumerate(detections):
            IOU_mat[t,d] = box_iou(trk,det) 
    
    # Produces matches       
    # Solve the maximizing the sum of IOU assignment problem using the
    # Hungarian algorithm (also known as Munkres algorithm)
    
    matched_idx = linear_assignment(-IOU_mat)        

    unmatched_trackers, unmatched_detections = [], []
    
    for t,trk in enumerate(trackers):
        if(t not in matched_idx[:,0]):
            unmatched_trackers.append(t)

    for d, det in enumerate(detections):
        if(d not in matched_idx[:,1]):
            unmatched_detections.append(d)

    matches = []
   
    # For creating trackers we consider any detection with an 
    # overlap less than iou_thrd to signifiy the existence of 
    # an untracked object
    
    for m in matched_idx:
        if(IOU_mat[m[0],m[1]]<iou_thrd):
            unmatched_trackers.append(m[0])
            unmatched_detections.append(m[1])
        else:
            matches.append(m.reshape(1,2))
    
    if(len(matches)==0):
        matches = np.empty((0,2),dtype=int)
    else:
        matches = np.concatenate(matches,axis=0)
    
    return matches, unmatched_detections, unmatched_trackers

In [None]:
def pipeline(img):
    '''
    Pipeline function for detection and tracking
    '''
    global tracker_list
    global track_id_list
    global model
    global people_count
    global frame_count
    global num_trackers
     
    if (frame_count%10==0):
        
        _, d_box = model.detect_image(img)

        d_box = list(map(tlbr_to_ltrb, d_box))
        
        img = np.asarray(img)

        img_dim = (img.shape[1], img.shape[0])

        t_box =[]

        if len(tracker_list) > 0:
            for trk in tracker_list:
                (_, xx) = trk.update(img)
                xx = ltwh_to_ltrb(xx)
                trk.box = xx
                t_box.append(trk.box)

        matched, unmatched_dets, unmatched_trks \
        = assign_detections_to_trackers(t_box, d_box, iou_thrd = 0.3)  

        unmatched_trks.sort(reverse = True) 

        print('Detection: ', d_box)
        print('t_box: ', t_box)
        print('matched:', matched)
        print('unmatched_det:', unmatched_dets)
        print('unmatched_trks:', unmatched_trks)
        print('people count:', people_count)
        print('frame count:', frame_count)

        # Deal with matched detections     
        if matched.size > 0:
            for trk_idx, det_idx in matched:
                draw_box_label(img, tracker_list[trk_idx].box, box_color=(255, 0, 0))    

        # Deal with unmatched detections      
        if len(unmatched_dets)>0:
            for idx in unmatched_dets:
                z = d_box[idx]
                tmp_trk = Tracker() # Create a new tracker
                tmp_trk.init(img, ltrb_to_ltwh(z))
                tmp_trk.box = z
                if len(tracker_list)<num_trackers:
                    tmp_trk.id = track_id_list.popleft() # assign an ID for the tracker
                    tracker_list.append(tmp_trk)
                else:
                    del tmp_trk
                draw_box_label(img, z, box_color=(0, 255, 0))

        # Deal with unmatched tracks       
        if len(unmatched_trks)>0:
            for trk_idx in unmatched_trks:
                draw_box_label(img, tracker_list[trk_idx].box, box_color=(0, 0, 255))
                track_id_list.append(tracker_list[trk_idx].id)
    #             tracker_list[trk_idx].release()
                tracker_list.remove(tracker_list[trk_idx])
                people_count+=1

    else:
        
        img = np.asarray(img)

        img_dim = (img.shape[1], img.shape[0])

        t_box =[]
        
        if len(tracker_list) > 0:
            for trk in tracker_list:
                (success, xx) = trk.update(img)
                if success:
                    xx = ltwh_to_ltrb(xx)
                    draw_box_label(img, xx, box_color=(0, 255, 255))
                    trk.box = xx
                    t_box.append(trk.box)
        
        print('t_box: ', t_box)
        print('people count:', people_count)
        print('frame count:', frame_count)
        
    print('Ending tracker_list: ',len(tracker_list))
    frame_count+=1
    return img

In [None]:
# Global variables to be used by funcitons of VideoFileClop
frame_count = 0 # frame counter
people_count = 0 #counting people

tracker_list = [] # list for trackers
track_id_list = deque(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']) # list for track ID
num_trackers = len(track_id_list)

model = YOLO()

In [None]:
filename = 'pres.mp4'
inputs = 'inputs/'
vid = cv2.VideoCapture(os.path.join(inputs, filename))

output = 'outputs/'
output_fn = ('{}.avi'.format(filename))

if not vid.isOpened():
    raise IOError("Couldn't open video")

video_fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')

video_fps = vid.get(cv2.CAP_PROP_FPS)

video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
              int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))

isOutput = True if output != "" else False

if isOutput:
    out = cv2.VideoWriter(os.path.join(output, output_fn), video_fourcc, video_fps, video_size)

In [None]:
while True:
    start = time.time()
    ret, frame = vid.read()
    if ret:
        image = Image.fromarray(frame)

        result = pipeline(image)

        if isOutput:
            print('Printing Out', time.time()-start)
            print()
            out.write(result)        
    else:
        break
        
vid.release()
out.release()
cv2.destroyAllWindows()