## Person and Vehicle Counter using OpenCV and YOLOv3 Tiny

### Import Libraries

In [1]:
#For command line use of YOLOv3 (required)
from absl import flags
import sys
FLAGS = flags.FLAGS
sys.argv = sys.argv[:1]
FLAGS(sys.argv)

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #use CPU=-1, GPU=0

import time #for calculating FPS
import numpy as np
import cv2 #OpenCV
import matplotlib.pyplot as plt

import tensorflow as tf
#under yolov3_tf2 folder
from yolov3_tf2.models import YoloV3Tiny
from yolov3_tf2.dataset import transform_images #for data augmentation
from yolov3_tf2.utils import convert_boxes #converts bboxes to deepsort format

#under deep_sort folder
from deep_sort import preprocessing #for max suppressions
from deep_sort import nn_matching #for setting up the association metrics
from deep_sort.detection import Detection #for object detection
from deep_sort.tracker import Tracker #for object tracking information
from tools import generate_detections as gdet #feature generation encoder

### Load YOLOv3 Model

In [2]:
#define classes
class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()]

#define allowed classes:
allowed_classes = ['person', 'bicycle', 'car', 'motorbike', 'bus', 'truck']

#load model
yolo = YoloV3Tiny(classes=len(class_names))
yolo.load_weights('./weights/yolov3-tiny.tf')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x149734dcaf0>

### Initialize DeepSORT

In [3]:
max_cosine_distance = 0.5 #used to determine if objects between frames are the same
nn_budget = None #used to form a gallery for storing of features
nms_max_overlap = 0.8 #used to avoid too many detections on the same object

model_filename = './model_data/mars-small128.pb' #pretrained CNN for pedestrian tracking
encoder = gdet.create_box_encoder(model_filename, batch_size=1) #feature generations

metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) #for measuring associations
tracker = Tracker(metric)

### Track Using Video

In [4]:
vid = cv2.VideoCapture('./data/video/test.mp4')

codec = cv2.VideoWriter_fourcc(*'XVID') #constructs the fourcc code of the codec for the VideoWriter constructor
vid_fps = int(vid.get(cv2.CAP_PROP_FPS)) #fps of original video CAP_PROP_FPS returns float
vid_width, vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_vid = cv2.VideoWriter('./data/video/results-tiny.avi', codec, vid_fps, (vid_width,vid_height)) #saves results to an output video

#list for historical trajectory
from collections import deque
points = [deque(maxlen=30) for _ in range(1000)]

#for counting
person_counter = []
vehicle_counter = []

#while loop for capturing all the frames in the video
while True:
    _, frame = vid.read()
    
    if frame is None: #if reaches the end of the video and there is no more image
        print('Completed!')
        break
    
    #preprocessing for YOLOv3 Input
    frame_input = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #video captured by OpenCV is in BGR format; tensorflow is RGB
    frame_input = tf.expand_dims(frame_input, 0) #expands dims from C,H,W to N,C,H,W
    frame_input = transform_images(frame_input, 416) #tensorflow shape is 416

    t1 = time.time() #start the timer
    
    bboxes, scores, classes, nums = yolo.predict(frame_input)
    
    #maximum of 100 bboxes per image
    #boxes: 3D shape (1, 100, 4); 100 max bboxes; 4 = x and y (center coordinates), width, height
    #scores: 2D shape (1, 100); detected objects' confidence scores
    #classes: 2D shape(1, 100); detected objects' classes
    #nums: 1D shape (1); the total number of detected objects
    #these variables are important for DeepSORT
    
    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])
    
    names = np.array(names) #format for Non-Maximum Suppression (NMS)
    converted_bboxes = convert_boxes(frame, bboxes[0]) #converts boxes into list
    features = encoder(frame, converted_bboxes) #generate the feature spectra of the detected object
    
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature 
                  in zip(converted_bboxes, scores[0], names, features)]
    
    #perform non-max suppression to eliminate multiple frames on one target
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) #indices associate an object with a track
    detections = [detections[i] for i in indices] #removes redundancies
    
    #detections can now be used for DeepSORT since NMS was used to eliminate duplication of the same target
    tracker.predict() #uses Kalman filtering
    tracker.update(detections) #updates the Kalman tracker parameters and filter
    
    cmap = plt.get_cmap('tab20b') #generate color maps
    colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] #generate 20 steps colors 
    
    person_current_count = int(0) #detect current vehicle in specific zone
    vehicle_current_count = int(0) #detect current vehicles in specific zone
    
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1: #if Kalman filtering was not able to assign a track
            continue
            
        bbox = track.to_tlbr() #for OpenCV output minX, minY, maxX, maxY
        class_name = track.get_class() #get the corresponding classes
        color = colors[int(track.track_id) % len(colors)] #assigning the color code
        color = [i * 255 for i in color] #color originally ranges from 0 to 1; thus must be converted from 0 to 255
        
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) #bounding box rectangle
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), 
                      (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) #rectangle for text
        cv2.putText(frame, class_name + " - " + str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 
                    0, 0.75, (255,255,255), 2) #display text for class name and Tracking ID
        
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1]) + (bbox[3]))/2)) #get center coordinates of bounding box
        points[track.track_id].append(center)
        
        #for historical trajectory
        for j in range(1, len(points[track.track_id])):
            if points[track.track_id][j-1] is None or points[track.track_id][j] is None: #check if current and previous tracker has a center point
                continue
            thickness = int(np.sqrt(64/float(j+1))*2) #closer points are visually thinner
            cv2.line(frame, (points[track.track_id][j-1]), (points[track.track_id][j]), color, thickness)
        
        #for counter in zone
        height, width, _ = frame.shape
        #zone
        cv2.line(frame, (0, int(3*height/6 + height/20)), (width, int(3*height/6 + height/20)), (0,255,0), thickness=2)
        cv2.line(frame, (0, int(3*height/6 - height/20)), (width, int(3*height/6 - height/20)), (0,255,0), thickness=2)
        
        center_x = int(((bbox[0])+(bbox[2]))/2)
        center_y = int(((bbox[1])+(bbox[3]))/2)
        
        if center_y <= int(3*height/6 + height/20) and center_y >= int(3*height/6 - height/20):
            if class_name == allowed_classes[0]: #if detected class is a person
                person_counter.append(int(track.track_id))
                person_current_count += 1
            elif class_name in allowed_classes[1:]: #if detected class is a vehicle
                vehicle_counter.append(int(track.track_id))
                vehicle_current_count += 1
    
    #display persons count
    person_total_count = len(set(person_counter))
    cv2.putText(frame, "Current Persons in Detection Zone: " + str(person_current_count), (0,80), 0, 1, (0,255,0),2)
    cv2.putText(frame, "Total Persons Count: " + str(person_total_count), (0,180), 0, 1, (0,255,0),2)
    
    #display vehicle count
    vehicle_total_count = len(set(vehicle_counter))
    cv2.putText(frame, "Current Vehicles in Detection Zone: " + str(vehicle_current_count), (0,130), 0, 1, (0,255,0),2)
    cv2.putText(frame, "Total Vehicle Count: " + str(vehicle_total_count), (0,230), 0, 1, (0,255,0),2)
    
    #display FPS
    fps = 1./(time.time() - t1)
    cv2.putText(frame, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
    
    cv2.imshow('output', frame)
    
    output_vid.write(frame)
    
    if cv2.waitKey(1) & 0xFF == 27: #press ESC to quit video
        break
        
vid.release()
output_vid.release()
cv2.destroyAllWindows()