## Security Camera - Using YOLO to Detect Objects And for Tracking

### Import Laberies and define application variables

In [1]:
import cv2
import random
import numpy as np
import cvlib as cv

from collections import deque
from timeit import default_timer as timer
from cvlib.object_detection import draw_bbox

# Catch frame from webcam
camera = cv2.VideoCapture(1)

# Scale to decrease the frame size
SCALE = 4

# Variable define timer of checking tracking status
CHECKER = 10

# Initializing deque object for center points of the detected object
points = deque(maxlen=50)

# Define variables for hight and width shape of the frames
HEIGH, WIDTH = 400, 800 

# Define objects boundaries size
MIN_OBJECT_AREA = 1000
MAX_OBJECT_AREA = 10000

# Define the tresh hold of the masks
DIFF_TRESH_HOLD = 10  # Should be low
MASK_TRESH_HOLD = 100 # Should be high

# Create backgroung of the main frame
foregroundModel = cv2.createBackgroundSubtractorMOG2()

# Define tracker dictionary
tracker_dict = { 'csrt': cv2.legacy.TrackerCSRT_create,
                 'kcf' : cv2.legacy.TrackerKCF_create,
                 'boosting' : cv2.legacy.TrackerMOSSE_create,
                 'mil': cv2.legacy.TrackerMIL_create,
                 'tld': cv2.legacy.TrackerTLD_create,
                 'medianflow': cv2.legacy.TrackerMedianFlow_create,
                 'mosse':cv2.legacy.TrackerMOSSE_create}

# Define the background
last_frame = np.zeros((int(HEIGH/SCALE), int(WIDTH/SCALE), 3) , np.uint8)

# Variable store the system status of tracking or not tracking
tracking_on = False
    
# Initializing variable
colors_labels = []

# Restart timer for FPS
fps_start = timer()    

tracking_status = "End Tracking"

# Variable counting how many time we are trying to detect objects
counter_frames_predictions = 0

# Variable counting how many time we are tracking after the objects
frames_tracking_counter = 0

# Variable counting how many object we detected in every iteration
object_detected_counter = 0

# Variable counting how many times we read frames
frames_reading_counter = 0
num_of_detections = 0
last_detections = 0
counter_fps = 0
FPS = 0 

### Define all application functions

In [2]:
# Function return 3-Dimension frame
def expands_dimensions(frame):
    
    new_image = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8)
    new_image[:, :, 0] = frame
    new_image[:, :, 1] = frame
    new_image[:, :, 2] = frame
    
    return new_image

# Convert frame from rgb to gray
def gray_frame(frame_rgb):
    
    # Converting captured frame to GRAY by OpenCV function    
    gray_frame = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2GRAY)
    
    frame_gray = np.zeros(frame_rgb.shape, np.uint8)
    frame_gray[:,:,0] = gray_frame
    frame_gray[:,:,1] = gray_frame
    frame_gray[:,:,2] = gray_frame
    
    return frame_gray

# This function remove the components that are smaller than praticular threshold
def keepLargeComponents(image, treshold):
    
    frame = np.zeros(image.shape) < 0 # boolean array
    unique_labels = np.unique(image.flatten()) # find out every unique value that is actually a label 
    
    for label in unique_labels:
        if label == 0: # background
            pass
        else:
            img = (image == label) # save the component
            if treshold < np.sum(img):
                frame = frame | img # save all the components
                
    return np.float32(255*frame)

# Function define the mask
def mask(frame_rgb, background):
    
    # Converting captured frame to GRAY by OpenCV function    
    frame_gray = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2GRAY)
    
    # Create one more frame with Gaussian blur
    frame_gray = cv2.GaussianBlur(frame_gray, (25, 25), 0)  

    # Converting captured frame to GRAY by OpenCV function        
    background = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)
    
    # Create one more frame with Gaussian blur
    background = cv2.GaussianBlur(background, (25, 25), 0)    
    
    # Return mask to detect change between two frames   
    abs_diff = cv2.absdiff(frame_gray, background)
    
    # Function exclude values that ara more than treshhold = 15 0 and more than 255
    _, mask = cv2.threshold(abs_diff, 9, 255, cv2.THRESH_BINARY)
    
    dilated_mask = cv2.dilate(mask, None, iterations = 5)

    # Expend mask dimension to 3 dimension
    mask_frame = expands_dimensions(dilated_mask)        

    return mask_frame

def mask_tracking(frame_RGB, last_frame):
    
    # Converting captured frame to GRAY by OpenCV function    
    frame_gray = cv2.cvtColor(frame_RGB, cv2.COLOR_RGB2GRAY)
    
    # Converting captured frame to GRAY by OpenCV function        
    last_frame = cv2.cvtColor(last_frame, cv2.COLOR_BGR2GRAY)
     
    # Return mask to detect change between two frames   
    abs_diff = cv2.absdiff(frame_gray, last_frame)
    
    # Function exclude values that ara more than treshhold = 15 0 and more than 255
    _, abs_diff_mask = cv2.threshold(abs_diff, DIFF_TRESH_HOLD, 255, cv2.THRESH_BINARY)

    # Expend mask dimension to 3 dimension
    mask_frame = expands_dimensions(abs_diff_mask)        
    
    return mask_frame

# Function create a mask with connectedComponents
def cv_mask(frame_rgb, last_frame):
       
    # Apply the frame to forground model
    foreground_mask = foregroundModel.apply(frame_rgb)
    
    # Reduce noises
    structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
    foreground_mask = cv2.morphologyEx(np.float32(foreground_mask), cv2.MORPH_OPEN, structuring_element)
    
    # Find out connected components and keep only the large components
    num_labels, image_labels = cv2.connectedComponents(np.array(0<foreground_mask, np.uint8))
    
    # Return components larger than threshold
    foreground_mask = keepLargeComponents(image_labels, treshold=10) 
    
    # Using 'clip' function to exclude values that are less than 0 and more than 255
    foreground_mask = np.clip(foreground_mask, 0, 255).astype(np.uint8) 
    
    # Function exclude values that ara more than treshhold = 15 0 and more than 255
    _, foreground_mask = cv2.threshold(foreground_mask, 5, 255, cv2.THRESH_BINARY)   
    
    # Converting output feature map from Tensor to Numpy array
    foreground_mask = foreground_mask[:, :, np.newaxis]  
    
    foreground_mask = np.repeat(foreground_mask, 3, axis=2) 
        
    return foreground_mask
    
# Function create 3 frames from the frame we read
def preproccess_frames(frame, last_frame):
    
    # Define small sizes
    heigh, width = int(HEIGH/SCALE), int(WIDTH/SCALE)
    
    # Resize the main frame to (WIDTH, HEIGH) shape
    frame = cv2.resize(frame, (WIDTH, HEIGH))
        
    # Copy frame to work with deffrent variable
    frame_rgb = cv2.resize(frame, (width, heigh))
    
    # Return mask for detection 
    frame_mask = cv_mask(frame_rgb, last_frame)
    
    # Return mask for tracking
    tracking_mask = mask_tracking(frame_rgb, last_frame)
    
    # Define last frame
    last_frame = frame_rgb.copy()
    
    return frame, frame_rgb, tracking_mask, frame_mask, last_frame

# Function manage the frames reader variables like efps etc'
def reader_manger(FPS, fps_start, counter_fps, tracking_on):
       
    # Variable says if keep reading frame or quit
    quit = False

    # Stopping the timer for FPS
    fps_stop = timer()

    # Print FPS every 1 second
    if 1.0 <= fps_stop - fps_start:

        # Define FPS
        FPS = counter_fps

        # Reset FPS counter
        counter_fps = 0

        # Restart timer for FPS
        fps_start = timer()       

    # Function waits for key to be pressed    
    key = cv2.waitKey(1) % 256

    # If 'n' is pressed, we catchs the frame and define it as the background
    if key == ord('n'):
        tracking_on = False

    # If 'q' key is pressed then quit from app
    if key == ord('q'):
        quit = True   

    # Putting text with label on the current BGR frame
    #cv2.putText(frame, str(FPS), (WIDTH - 70, HEIGH - 30), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 2)  

    return FPS, fps_start, counter_fps, tracking_on, quit

# Function display the frames on the screen in one window
def display_windows(frame, tracking_mask, frame_mask):   
            
    frame_mask = cv2.resize(frame_mask, (WIDTH, HEIGH))
    # Create left window    
    main_window = np.hstack((frame, frame_mask))
    #main_window = np.hstack((frame, tracking_mask, frame_mask))

    # Plotting all the frames in one window
#     cv2.imshow("Main_Window", main_window)  
    # Plotting all the frames in one window
    cv2.imshow("Main_Window", frame)       

### Define all the detection functions

In [3]:
# Function manage the detection and return status and coordinates
def detection_manager(frame, frame_rgb):

    # Store the bounding boxes with the new coordinates in a list
    boxes = []
    
    # Variable count how many objects we detect
    num_of_objects = 0

    # Function return all scores of model predictions
    bounding_boxes, detected_labels, scores = cv.detect_common_objects(frame_rgb)

    # Check if we succeeded to detect objects
    if 0 < len(bounding_boxes):
        
        # Scaling the bounding boxes back to original main frame size
        for box in bounding_boxes:

            # Increase the objects counter
            num_of_objects += 1

            # Create new list of bounding boxes that fit to main frame size
            (x_min, y_min, x_max, y_max) = [int(a) for a in box]
            
            # rectangle object contain x1, y1, box width, box height and not x,y max coordinates
            (x_min, y_min, x_max, y_max) = x_min*SCALE, y_min*SCALE, (x_max)*SCALE, (y_max)*SCALE
            
            # bounding_boxes contain x1, y1, x2, y2, coordinates and not width and heigh
            bounding_box = np.array([x_min, y_min, x_max, y_max])
            boxes.append(bounding_box)

            # Value means we start tracking after the objects
            tracking_status = "Start tracking"
    else:      
        # Value means no tracking need and have to try detect again
        tracking_status = "End tracking"
            
    return boxes, detected_labels, scores, num_of_objects, tracking_status

# Function fit color to every label
def set_labels_colors(detected_labels):
    
    colors_labels = []
    labels = set(detected_labels)

    for i in range(len(detected_labels)):

        red = random.randint(0, 255)
        blue = random.randint(0, 255)
        green = random.randint(0, 255)

        color = (blue, green, red)
        label = detected_labels[i]

        color_label = (label, color)

        colors_labels.append(color_label)

    return colors_labels

def detected_object(frame_mask):
    
    # Count the number of detection
    num_of_detected = 0
    
    mask = frame_mask.copy()
    
    # Converting captured frame to GRAY by OpenCV function    
    mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)
    
    # Function return array of all contours we found
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sorted the contours and define the larger first
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    # Scan the contours list
    for contour in contours:

        # Return square area of the given contour
        contour_area = cv2.contourArea(contour)

        # Find contours between MIN_OBJECT_AREA to MAX_OBJECT_AREA
        if contour_area < MAX_OBJECT_AREA:
            if MIN_OBJECT_AREA < contour_area:
                
                # Increase the number of objects
                num_of_detected +=1
                
                # Get an approximate rectangle coordinates
                (x_min, y_min, box_width, box_height) = cv2.boundingRect(contour)

                # Drawing rectangle on the frame
                frame_mask = cv2.rectangle(frame_mask, (x_min, y_min), (x_min +box_width, y_min +box_height), (0, 255, 0), 2)
                
                # Store the rectangle coordinates around the object
                rectangle = np.array([x_min, y_min, box_width, box_height])
            else:
                # Contour is a sorted list so all the rest items irrelevant
                break
    
    return num_of_detected, frame_mask

### Define all the tracking functions

In [4]:
# Function manages the start tracking case
def start_tracking(frame, frame_rgb, bounding_boxes):
    
    # Initialize our tracker after the object
    trackers = cv2.legacy.MultiTracker_create()
    
    # Update boxes list to the original main frame size scale
    for box in bounding_boxes:

        # Create rectangle that use us to tracking after the object and fit to main frame size
        (x_min, y_min, x_max, y_max) = [int(a) for a in box]
        
        # rectangle object contain x1, y1, box width, box height and not x,y max coordinates
        (x_min, y_min, x_max, y_max) = int(x_min/SCALE), int(y_min/SCALE), int((x_max)/SCALE), int((y_max)/SCALE)

        # rectangle object contain x1, y1, box width, box height and not x,y max coordinates
        rectangle = np.array([x_min, y_min, (x_max-x_min), (y_max-y_min)])

        # Add the object to the trackers list
        tracker_i = tracker_dict['csrt']()
        trackers.add(tracker_i, frame_rgb, rectangle)

    return trackers    

# Function manage the tracking and return status and coordinates
def tracking_manager(frame, frame_rgb, bounding_boxes, tracking_status, frames_tracking_counter):

    # Initialize our tracker after the object
    trackers = cv2.legacy.MultiTracker_create()
    
    # Check if there is still object to track after
    if tracking_status == 'End Tracking':
        
        return frame, tracking_status, trackers
            
    # Check if we are just start the tracking or we are just keeping it
    if tracking_status == 'Start tracking':
          
        trackers = start_tracking(frame, frame_rgb, bounding_boxes)      
        
        return frame, tracking_status, trackers
        
    return frame, trackers, tracking_status

# Function manges the keep tracking case
def keep_tracking_manager(frames_variables, tracking_variabels):

    start_function = timer()
    
    # Extract the function variables
    frame, frame_rgb, tracking_mask, frame_mask, frames_tracking_counter =  [var for var in frames_variables]     
    trackers, tracking_status, tracking_on, detected_labels, colors_labels = [var for var in tracking_variabels]  
    
    # Get the bounding box from the frame
    (success, bounding_boxes) = trackers.update(frame_rgb)  

    # Strart\Keep tracking
    if success:      

        # Variable is index of box in the bounding_boxes
        index = 0 

        for box in (bounding_boxes):        
            
            # Get the coordinates of the rectangle around the object
            (x_min, y_min, w, h) = [int(a) for a in box]

            # rectangle object contain x1, y1, box width, box height and not x,y max coordinates
            (x_min, y_min, w, h) = x_min*SCALE, y_min*SCALE, (w)*SCALE, (h)*SCALE

            # Check if coordinates is in the frame boundaries
            if 0 <= x_min and x_min+w <= WIDTH and 0 <= y_min and y_min+h <= HEIGH:  

                # Every 25 frames checking if tracking is still running after the object or not
                if(frames_tracking_counter%25 == -1):  

                    # Update tracking status
                    tracking_status = "End tracking"

                    # Initializes variable
                    tracking_on = False 
                    break

                # Set tracking status ON
                tracking_on = True       

                # Update tracking status
                tracking_status = "Keep tracking"      
                
                # Define the current label
                label = detected_labels[index]

                # Extract the label color
                for element in colors_labels:
                    if element[0] == label:
                        Color = element[1]
                    
                # Drawing bounding box on the current BGR frame        
                cv2.rectangle(frame, (x_min, y_min), (x_min+w, y_min+h), Color, 2)

                # Putting text with label on the current BGR frame
                cv2.putText(frame, label, (x_min-5, y_min-5), cv2.FONT_HERSHEY_SIMPLEX, 1.5, Color, 2)  

            # Increase index by 1
            index += 1
    end_function = timer()        

    return frame, tracking_status, tracking_on, trackers

### Reading framers from camera

In [5]:
# Loop reading frame by frame and processing them
while True:

    # Increasing FPS counter
    counter_fps += 1
        
    # Increase tracking counter
    frames_reading_counter += 1
    
    # Capturing frames one-by-one from camera
    ret, frame = camera.read()
    
    # If the frame was not retrieved then we break the loop
    if not ret or frame is None:
        break        
    
    # Define the fps of the loop using cv2 function
    camera_fps = (int(camera.get(cv2.CAP_PROP_FPS)))
        
    # Function return 3 diffrent kind of frames
    frame, frame_rgb, tracking_mask, frame_mask, last_frame = preproccess_frames(frame, last_frame)

    # Counting the numb of objects in the frame
    num_of_detections, frame_mask = detected_object(frame_mask)  
    
    if 0 < np.sum(frame_mask):

        # If the number of objects has change we start detection state again
        if last_detections < num_of_detections:

            tracking_on = False
            tracking_status == "End tracking"

        # Treats objects tracking
        if tracking_on == True:

            # Putting text with number of derection on the current mask frame
            cv2.putText(frame_mask, "Last Detection: "+ str(last_detections), (10, 15), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)  
            cv2.putText(frame_mask, "Last Detection: "+ str(num_of_detections), (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            # Increase tracking counter
            frames_tracking_counter += 1

            # Compres the keep_tracking_manager variables
            frames_variables = [frame, frame_rgb, tracking_mask, frame_mask, frames_tracking_counter]   
            tracking_variabels = [trackers, tracking_status, tracking_on, detected_labels, colors_labels]

            # Function manage the the part of the tracking   
            frame, tracking_status, tracking_on, tracker = keep_tracking_manager(frames_variables, tracking_variabels)

            # Update tracking status for the next iterate
            if tracking_status == "End tracking":
                tracking_on = False 
            if tracking_status == "Keep tracking":
                tracking_on = True

        # End of tracking_on - Treats objects detection
        else:

            # Increase prediction counter
            counter_frames_predictions += 1

            # Counting the numb of objects in the frame
            last_detections, frame_mask = detected_object(frame_mask)

            # Function manage the detection part and return coordinates of drawing   
            bounding_boxes, detected_labels, scores, object_detected_counter, tracking_status = detection_manager(frame, frame_rgb)

            # Function return set of (label,color)
            colors_labels = set_labels_colors(detected_labels)

            # Function draw boxes around the detected objects
            frame = draw_bbox(frame, bounding_boxes, detected_labels, scores)

            # Function manage the first part of the tracking and return coordinates of tracking    
            frame, tracking_status, trackers = tracking_manager(frame, frame_rgb, bounding_boxes, tracking_status, frames_tracking_counter)

            # Update tracking status to decide what next in the next iteration
            if tracking_status == "Start tracking":
                tracking_on = True
            elif tracking_status == "End tracking":
                tracking_on = False

            
    # Function manage the frames reader variables    
    FPS, fps_start, counter_fps, tracking_on, quit = reader_manger(FPS, fps_start, counter_fps, tracking_on)
        
    # Display all frames in one window
    display_windows(frame, tracking_mask, frame_mask)

    # If quit is true so we stop read frames    
    if quit == True:
        break

# Releasing camera
camera.release()

# Destroying all opened OpenCV windows
cv2.destroyAllWindows()      