## Person and Vehicle Counter/License Plate Detector Using OpenCV and Pre-trained Intel Model by OpenVINO

### NOTE: Work in Progress (currently includes Object Detection and Tracking)
- Model reference: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/person-vehicle-bike-detection-crossroad-0078

### Import Libraries

In [1]:
import collections 
import os
import sys
import time

import pandas as pd

import cv2
import numpy as np
from IPython import display
from openvino.runtime import Core

#for object tracking
from deep_sort import preprocessing #for max suppressions
from deep_sort import nn_matching #for setting up the association metrics
from deep_sort.detection import Detection #for object detection
from deep_sort.tracker import Tracker #for object tracking information
from tools import generate_detections as gdet #feature generation encoder

import imutils
from imutils.video import VideoStream, FPS

### Load the Detection Model

In [2]:
THRESH = 0.1

# model_path = "c:/Users/Lenard/intel/person-vehicle-bike-detection-crossroad-0078/FP16/person-vehicle-bike-detection-crossroad-0078.xml"

# #initialize inference engine
# ie = Core()
# #read the network and corresponding weights from file
# detector = ie.read_model(model=model_path)

# #compile the model for the CPU (you may also use GPU, MYRIAD, etc.)
# #or let the engine choose the available device (AUTO)
# compiled_model = ie.compile_model(model=detector, device_name="GPU")

# #get input and output nodes
# input_layer = compiled_model.input(0)
# output_layer = compiled_model.output(0)

# #get input size
# height, width = list(input_layer.shape)[2:]
# print("Height: {}, Width: {}".format(height,width))
# print(input_layer.any_name, output_layer.any_name)

In [3]:
# License Plate Detection
lpd_model_path = "c:/Users/Lenard/intel/vehicle-license-plate-detection-barrier-0106/FP16/vehicle-license-plate-detection-barrier-0106.xml"

#initialize inference engine
ie = Core()
#read the network and corresponding weights from file
plate_detector = ie.read_model(model=lpd_model_path)

#compile the model for the CPU (you may also use GPU, MYRIAD, etc.)
#or let the engine choose the available device (AUTO)
compiled_plate_d_model = ie.compile_model(model=plate_detector, device_name="GPU")

#get input and output nodes
plate_d_input_layer = compiled_plate_d_model.input(0)
plate_d_output_layer = compiled_plate_d_model.output(0)

#get input size
print(plate_d_input_layer.shape)
plate_d_height, plate_d_width = list(plate_d_input_layer.shape)[1:3]
print("Height: {}, Width: {}".format(plate_d_height, plate_d_width))

{1, 300, 300, 3}
Height: 300, Width: 300


### Load DeepSORT Tracker

In [4]:
max_cosine_distance = 0.4 #used to determine if objects between frames are the same
nn_budget = None #used to form a gallery for storing of features
nms_max_overlap = 1.0 #used to avoid too many detections on the same object

model_filename = './model_data/mars-small128.pb' #pretrained CNN for pedestrian tracking
encoder = gdet.create_box_encoder(model_filename, batch_size=8) #feature generations

metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) #for measuring associations
tracker = Tracker(metric)

### Processing Results
- List available classes and create corresponding colors. 
- In post-processing, boxes are transformed via normalization. 
- NMS will also be used to avoid overlapping detections and those that do not meet the threshold.
- Finally, boxes and labels will be drawn

In [5]:
#classes of person-vehicle-bike-detection-crossroad-0078
classes = ["person", "vehicle", "bike"]

#map corresponding colors to classes
colors = cv2.applyColorMap(
    src = np.arange(0, 255, 255 / len(classes), dtype=np.float32).astype(np.uint8),
    colormap = cv2.COLORMAP_RAINBOW,
).squeeze()

In [6]:
# classes of license plate detection
plate_d_classes = ["vehicle", "license plate"]

#map corresponding colors to classes
plate_d_colors = cv2.applyColorMap(
    src = np.arange(0, 255, 255 / len(classes), dtype=np.float32).astype(np.uint8),
    colormap = cv2.COLORMAP_RAINBOW,
).squeeze()

In [7]:
def process_results(frame, results, thresh=0.6):
    # size of the original frame
    h, w = frame.shape[:2]
    results = results.squeeze()
    boxes = []
    labels = []
    scores = []
    ids = []
    new_rects = []
    for idx, label, score, xmin, ymin, xmax, ymax in results:
        # create a box using pixel coordinates
        boxes.append(
            tuple(map(int, (xmin*w, ymin*h, (xmax-xmin)*w, (ymax-ymin)*h)))
        )
        labels.append(int(label))
        scores.append(float(score))
        ids.append(int(idx))
        
    # apply NMS (Non-Maximum Suppression) to eliminate overlapping entities
    # this algorithm returns indices of objects to keep
    indices = cv2.dnn.NMSBoxes(
        bboxes=boxes, scores=scores, score_threshold=thresh, nms_threshold=0.6
    )
    
    #if there are no boxes
    if len(indices) == 0:
        return []
        
    #filter detected objects
    return [(labels[i], scores[i], boxes[i]) for i in indices.flatten()]


def draw_boxes(detection_type, frame, boxes):
    if detection_type == "PVB":
        for label, score, box in boxes:
            label = label-1
            color = tuple(map(int, colors[label]))
            #draw box
            x2 = box[0] + box[2]
            y2 = box[1] + box[3]
            cv2.rectangle(frame, box[:2], (x2,y2), color=color, thickness=3)

            #draw label
            cv2.putText(
                frame,
                f"{classes[label]} {score:.2f}",
                (box[0]+10, box[1]+30),
                cv2.FONT_HERSHEY_COMPLEX,
                frame.shape[1]/1000,
                color,
                1,
                cv2.LINE_AA
            )
        return frame
    elif detection_type == "LP":
        for label, score, box in boxes:
            label = label-1
            color = tuple(map(int, plate_d_colors[label]))
            #draw box
            x2 = box[0] + box[2]
            y2 = box[1] + box[3]
            cv2.rectangle(frame, box[:2], (x2,y2), color=color, thickness=3)

            #draw label
            cv2.putText(
                frame,
                f"{plate_d_classes[label]} {score:.2f}",
                (box[0]+10, box[1]+30),
                cv2.FONT_HERSHEY_COMPLEX,
                frame.shape[1]/1000,
                color,
                1,
                cv2.LINE_AA
            )
        return frame

In [8]:
def detect(source=0, flip=False, use_popup=False, skip_first_frames=0):
    vid = cv2.VideoCapture(source)

    #give camera time to warm up
    time.sleep(0.1)
    
    processing_times = collections.deque()
    
    #list for historical trajectory
    from collections import deque
    points = [deque(maxlen=30) for _ in range(1000)]
    time_points = [deque(maxlen=30) for _ in range(1000)]

    frame_count = 0
    
    #for counting totals
    person_counter = []
    vehicle_counter = []

    #for counting in hours
    person_counter_hour = []
    vehicle_counter_hour = []

    #for storing the count of the previous hours
    prev_hours_person_count = 0
    prev_hours_vehicle_count = 0

    #dictionary for count data
    #define dictionary of data
    count_dict = {'Total Persons': [0], 'Total Vehicles': [0], 'Day': [0], 'Date': [0], 'Time': [0]}
    
    #begin video capturing
    while (vid.isOpened()):
        #capture frame
        ret, frame = vid.read()
        
        if frame is None:
            print('Completed!')
            break
        
        #if frame is larger than full HD, reduce size to improve performance
        scale = 1280 / max(frame.shape)
        if scale < 1:
            frame = cv2.resize(
                src=frame,
                dsize=None,
                fx=scale,
                fy=scale,
                interpolation=cv2.INTER_AREA
            )
        
        
        # resize image to fit neural network input for person, vehicle, and bike detection
#         input_frame = cv2.resize(
#             frame, dsize=(width,height), interpolation=cv2.INTER_AREA
#         )
#         frame_rgb = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
#         #create batch of images
#         input_frame = np.expand_dims(input_frame.transpose(2,0,1), axis=0)
        
        # resize image to fit neural network input for plate number detection
        plate_d_input_frame = cv2.resize(
            frame, dsize=(plate_d_width,plate_d_height), interpolation=cv2.INTER_AREA
        )
        plate_d_frame_rgb = cv2.cvtColor(plate_d_input_frame, cv2.COLOR_BGR2RGB)
        #create batch of images
        plate_d_input_frame = np.expand_dims(plate_d_input_frame, axis=0)
        
                   
        #measure processing time
        t1 = time.time()
        #get detections
        #results = compiled_model([input_frame])[output_layer]
        plate_d_results = compiled_plate_d_model([plate_d_input_frame])[plate_d_output_layer]
        t2 = time.time()
        
        # get poses from person, vehicle, bike detection results
#         boxes = process_results(frame=frame, results=results, thresh=THRESH)
#         # draw bboxes
#         frame = draw_boxes(detection='PVB', frame=frame, boxes=boxes)
        
        #get poses from plate number detection results
        plate_d_boxes = process_results(frame=frame, results=plate_d_results, thresh=THRESH)
        # draw bboxes
        frame = draw_boxes(detection_type='LP', frame=frame, boxes=plate_d_boxes)

        # size of the original frame
        h, w = frame.shape[:2]
        
        #create lists for prediction results
        labels = []
        scores = []
        bboxes = []
        for label, score, box in plate_d_boxes:
            label = label-1
            labels.append(label)
            scores.append(score)
            bboxes.append(list(np.array(box)))
        
        names = []
        for i in range(len(labels)):
            names.append(classes[int(labels[i])])
        names = np.array(names)
        
        features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, name, feature) for bbox, score, name, feature
                     in zip(bboxes, scores, names, features)]
        
        #detections can now be used for DeepSORT since NMS was used to eliminate duplication of the same target
        tracker.predict() #uses Kalman filtering
        tracker.update(detections) #updates the Kalman tracker parameters and filter
        
        person_current_count = int(0) #detect current vehicle in specific zone
        vehicle_current_count = int(0) #detect current vehicles in specific zone
        
        #show tracked objects
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1: #if Kalman filtering was not able to assign a track
                continue
        
            bbox = track.to_tlbr() #for OpenCV output minX, minY, maxX, maxY
            class_name = track.get_class() #get the corresponding classes

            center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1]) + (bbox[3]))/2)) #get center coordinates of bounding box
            
            #display text and circle relating to the tracked object
            cv2.putText(
                frame, "ID {}".format(track.track_id),
                (center[0]-10, center[1]-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
            )
            cv2.circle(frame, (center[0],center[1]), 4, (0,255,0), -1)
            
            #for counting
            h, w, _ = frame.shape
            
            if center[1] <= h and center[1] >= 0 and center[0] >= 0 and center[0] <= w:
                if class_name == classes[0]: #if detected class is a person
                    if int(track.track_id) not in person_counter:
                        person_counter.append(int(track.track_id))
                    person_current_count += 1
                elif class_name in classes[1:]: #if detected class is a vehicle or bike
                    if int(track.track_id) not in vehicle_counter:
                        vehicle_counter.append(int(track.track_id))
                    vehicle_current_count += 1
            
        #display persons count
        person_total_count = len(set(person_counter))
        person_total_count_hour = len(set(person_counter)) - prev_hours_person_count
        cv2.putText(frame, "Current Persons in Frame: " + str(person_current_count), (0,130), 0, 1, (0,255,0),2)
        cv2.putText(frame, "Total Persons Detected This Hour: " + str(person_total_count_hour), (0,280), 0, 1, (0,255,0),2)
        cv2.putText(frame, "Total Persons Detected: " + str(person_total_count), (0,430), 0, 1, (0,255,0),2)

        #display vehicle count
        vehicle_total_count = len(set(vehicle_counter))
        vehicle_total_count_hour = len(set(vehicle_counter)) - prev_hours_vehicle_count
        cv2.putText(frame, "Current Vehicles in Frame: " + str(vehicle_current_count), (0,180), 0, 1, (0,255,0),2)
        cv2.putText(frame, "Total Vehicles Detected This Hour: " + str(vehicle_total_count_hour), (0,330), 0, 1, (0,255,0),2)
        cv2.putText(frame, "Total Vehicles Detected: " + str(vehicle_total_count), (0,480), 0, 1, (0,255,0),2)
    
        processing_times.append(t1 - t2)
        #use processing times from last 200 frames
        if len(processing_times) > 200:
            processing_times.popleft()
            
        _, f_width = frame.shape[:2]
        # get mean processing time [ms]
        processing_time = np.mean(processing_times) * 1000
        #get fps
        fps = 1000 / processing_time
        #display fps
        cv2.putText(
            frame,
            f"Inference Time: {processing_time:.1f}ms ({fps:.1f} FPS)",
            (20, 40),
            cv2.FONT_HERSHEY_COMPLEX,
            f_width / 1000,
            (0,0,255),
            1,
            cv2.LINE_AA
        )
        
        frame_count += 1
        
        #display date and time
        current_time = time.asctime( time.localtime(time.time()) )
        cv2.putText(frame, current_time, (0,80), 0, 1, (255,255,255), 2)

        #Save count every hour
        if ( (time.localtime(time.time()).tm_min % 1 == 0) and (time.localtime(time.time()).tm_min == 0) and (time.localtime(time.time()).tm_sec == 0) ):
            #append to dictionary
            date_time_split = current_time.split()
            count_dict['Total Persons'].append(person_total_count_hour)
            count_dict['Total Vehicles'].append(vehicle_total_count_hour)
            count_dict['Day'].append(date_time_split[0])
            count_dict['Date'].append(date_time_split[2] + " " + date_time_split[1] + " " + date_time_split[4])
            count_dict['Time'].append(date_time_split[3])

            #form dataframe
            df = pd.DataFrame(count_dict)

            #save dataframe to CSV file
            df.to_csv('Person and Vehicle Count (Per Hour).csv')
            print('Saved Count Data for ' + current_time)

            #reset total person and vehicle count
            prev_hours_person_count = person_total_count
            prev_hours_vehicle_count = vehicle_total_count
        
        #display video stream
        cv2.imshow('Video', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'): #press Q to quit
            break

    #clear stream capture
    vid.release()
    cv2.destroyAllWindows()

In [9]:
video_path = './data/video/Megaworld CCTV/5_6337009788940977940.avi'
detect(source=video_path, flip=True, use_popup=False)

  fps = 1000 / processing_time
