## **CNN in Real Time using Transfer Learning with ResNet50 on CIFAR10 Dataset** 


This app recognize movments, **Detect Object** and **Tracking** after it in **Real Time Camera.**                                  
We use **Resnet50** model with **Transfer Learning** on **CIFAR10** Dataset that we trained and perfoms **95% accuracy.**       
After we recognize movment we detect the object by using our model and then we start tracking after it.                         
We store all the detections events in format of date and time and and show it on the screen.                                     
The application using **TensorFlow** with **Keras** by **Python**.

### Import Libraries, Loading Model and Set Camera  ###

In [1]:
import os
import io
import cv2
import h5py
import winsound
import numpy as np
import matplotlib.pyplot as plt

from time import strftime
from collections import deque
from timeit import default_timer as timer
from tensorflow.keras.models import load_model

# Define the labels of CIFAR-10 datasest
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Load our model that trained by 25 epochs on CIFAR dataset
model = load_model(r'C:\file location..')

# Print trained model summery
model.summary()

# Catch frame from webcam
camera = cv2.VideoCapture(r'C:\file location..')

# This path is location for the sound file
soundPath = (r'C:\file location..') 

# This path is loaction for the saved images 
outPutPath = (r'C:\file location..') 

# This path is location for the saved detections events
txtPath = (r'C:\file location..')

# Writing The Headline of the text file
with open(txtPath, 'a') as f:
    f.write("###################### Detection Events #########################")
    f.write('\n')
    f.write('\n')

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model (Functional)          (None, 7, 7, 2048)        23587712  
                                                                 
 flatten_1 (Flatten)         (None, 100352)            0         
                                                                 
 dense_3 (Dense)             (None, 256)               25690368  
                                                                 
 batch_normalization_2 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_4 (Dense)             (None, 256)               65792     
                                                      

### Define The Application Constans Variabels ###

In [2]:
# Scale to decrease the frame size
SCALE = 1

# Define variables for hight and width shape of the frames
HEIGH, WIDTH = 350, 500   

# Define variables for hight and width shape to prediction model input
hight, width = 224, 224  

# Define objects boundaries size
MIN_OBJECT_AREA = 1000
MAX_OBJECT_AREA = 10000

# Define the tresh hold of the masks
DIFF_TRESH_HOLD = 20  # Should be low
MASK_TRESH_HOLD = 100 # Should be high

# Variables for start rows and cols to put text
FIRST_ROW = int(HEIGH/10)
FIRST_COL = int(WIDTH/10)
ROWS_SPACE = int(HEIGH/10) 

# Define the font size as precent from the screen size
FONT_SIZE = ((HEIGH/1500))

# Create backgroung of the main frame
foregroundModel = cv2.createBackgroundSubtractorMOG2()

# Define the background
last_frame = np.zeros((int(HEIGH/SCALE), int(WIDTH/SCALE),3) , np.uint8)
scores_frame = np.zeros((int(HEIGH/SCALE), int(WIDTH/SCALE),3) , np.uint8)

# Initializing deque object for center points of the detected object
points = deque(maxlen=50)

# Define timer to check the tracking
tracking_check = 10

# Define tracker dictionary
tracker_dict = { 'csrt': cv2.TrackerCSRT_create,
                 'kcf' : cv2.TrackerKCF_create,
                 'boosting' : cv2.legacy.TrackerMOSSE_create(),
                 'mil': cv2.TrackerMIL_create,
                 'tld': cv2.legacy.TrackerTLD_create(),
                 'medianflow': cv2.legacy.TrackerMedianFlow_create(),
                 'mosse':cv2.legacy.TrackerMOSSE_create()}

# Initialize our tracker after the object
tracker = tracker_dict['csrt']()

### Define Application Variables ###

In [3]:
# Store first frame from reading camera
ret, frm = camera.read()

# Inititialize first frames
cut_fragment_rgb = np.zeros((HEIGH, WIDTH, 3), np.uint8)
tracking_frame = np.zeros((HEIGH, WIDTH, 3), np.uint8)
inf_frame = np.zeros((HEIGH, WIDTH, 3), np.uint8)

# Define counters variables
counter_images_processing = 0
counter_frames_processing = 0
counter_frames_prediction = 0
counter_fail_predictions = 0
counter_birds_prediction = 0 
counter_frames_tracking = 0
counter_frames_reading = 0    

# Variable store the system status of tracking or not tracking
tracking_on = False

# Restart timer for FPS
fps_start = timer()  

# Inintialize variables
detection_time = 0
counter_fps = 0
FPS = 0

# Variable hold all the detections events
log = []
label = ""

### Define The General Application Functions ###

In [4]:
# Function return 3-Dimension frame
def expands_dimensions(frame):
    
    new_image = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8)
    new_image[:, :, 0] = frame
    new_image[:, :, 1] = frame
    new_image[:, :, 2] = frame
    
    return new_image

# Convert frame from rgb to gray
def gray_frame(frame_rgb):
    
    # Converting captured frame to GRAY by OpenCV function    
    gray_frame = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2GRAY)
    frame_gray = np.zeros(frame_rgb.shape, np.uint8)
    frame_gray[:,:,0] = gray_frame
    frame_gray[:,:,1] = gray_frame
    frame_gray[:,:,2] = gray_frame
    
    return frame_gray

# Function return empty frame for initlize the main window
def empty_frame(frame_bgr):
    
    empy_frame = np.zeros(frame_bgr.shape, np.uint8)
    empy_frame[:, :, 0] = 255
    empy_frame[:, :, 1] = 255
    empy_frame[:, :, 2] = 255
    
    return empy_frame

# Function return how many mitnutes passed between two detections
def detection_timer(detection_time, last_time):
    
    if 0 < len(last_time):    
        element = last_time
        minute = element[len(element) -2]
        minute = int(minute) * 10
        second = element[len(element) -1]
        second = int(second)
        minutes = minute + second
        last_time_minutes = minutes

        element = detection_time
        minute = element[len(element) -2]
        minute = int(minute) * 10
        second = element[len(element) -1]
        second = int(second)
        minutes = minute + second
        detection_time_minutes = minutes

        time_past = abs(detection_time_minutes - last_time_minutes)
    
        return time_past
    else:
        return 0

# Function create 3 frames from the frame we read
def preproccess_frames(frame, last_frame):
    
    
    # Resize the main frame to (WIDTH, HEIGH) shape
    frame = cv2.resize(frame, (WIDTH, HEIGH))
        
    # Copy frame to work with deffrent variable
    frame_rgb = cv2.resize(frame, (int(WIDTH/SCALE), int(HEIGH/SCALE)))
    
    # Return mask frame 
    frame_mask = cv_mask(frame_rgb, last_frame)
    
    # Return mask for tracking
    tracking_mask = mask_tracking(frame_rgb, last_frame)
    
    # Define last frame
    last_frame = frame_rgb.copy()
    
    return frame, frame_rgb, tracking_mask, frame_mask, last_frame

# Function define the mask
def mask(frame_rgb, background):
    
    # Converting captured frame to GRAY by OpenCV function    
    frame_gray = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2GRAY)
    
    # Create one more frame with Gaussian blur
    frame_gray = cv2.GaussianBlur(frame_gray, (25, 25), 0)  

    # Converting captured frame to GRAY by OpenCV function        
    background = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)
    
    # Create one more frame with Gaussian blur
    background = cv2.GaussianBlur(background, (25, 25), 0)    
    
    # Return mask to detect change between two frames   
    abs_diff = cv2.absdiff(frame_gray, background)
    
    # Function exclude values that ara more than treshhold = 15 0 and more than 255
    _, mask = cv2.threshold(abs_diff, 20, 255, cv2.THRESH_BINARY)
    
    # Dilates the object in the frame
    dilated_mask = cv2.dilate(mask, None, iterations = 5) 
    
    return dilated_mask

# Function create a mask to track the object
def mask_tracking(frame_RGB, last_frame):
    
    # Converting captured frame to GRAY by OpenCV function    
    frame_gray = cv2.cvtColor(frame_RGB, cv2.COLOR_RGB2GRAY)
    
    # Converting captured frame to GRAY by OpenCV function        
    last_frame = cv2.cvtColor(last_frame, cv2.COLOR_BGR2GRAY)
     
    # Return mask to detect change between two frames   
    abs_diff = cv2.absdiff(frame_gray, last_frame)
    
    # Function exclude values that ara more than treshhold = 15 0 and more than 255
    _, abs_diff_mask = cv2.threshold(abs_diff, DIFF_TRESH_HOLD, 255, cv2.THRESH_BINARY)

    # Expend mask dimension to 3 dimension
    mask_frame = expands_dimensions(abs_diff_mask)        
    
    return mask_frame

# Function create a mask with connectedComponents
def cv_mask(frame_rgb, last_frame):
       
    # Apply the frame to forground model
    foreground_mask = foregroundModel.apply(frame_rgb)
    
    # Reduce noises
    structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
    foreground_mask = cv2.morphologyEx(np.float32(foreground_mask), cv2.MORPH_OPEN, structuring_element)
    
    # Find out connected components and keep only the large components
    num_labels, image_labels = cv2.connectedComponents(np.array(0<foreground_mask, np.uint8))
    
    # Return components larger than threshold
    foreground_mask = keepLargeComponents(image_labels, treshold=0) 
    
    # Using 'clip' function to exclude values that are less than 0 and more than 255
    foreground_mask = np.clip(foreground_mask, 0, 255).astype(np.uint8) 
    
    # Function exclude values that ara more than treshhold = 15 0 and more than 255
    _, foreground_mask = cv2.threshold(foreground_mask, 0, 255, cv2.THRESH_BINARY)   
    
    # Converting output feature map from Tensor to Numpy array
    foreground_mask = foreground_mask[:, :, np.newaxis]  
        
    return foreground_mask

# This function remove the components that are smaller than praticular threshold
def keepLargeComponents(image, treshold):
    
    frame = np.zeros(image.shape) < 0 # boolean array
    unique_labels = np.unique(image.flatten()) # find out every unique value that is actually a label 
    
    for label in unique_labels:
        if label == 0: # background
            pass
        else:
            img = (image == label) # save the component
            if treshold < np.sum(img):
                frame = frame | img # save all the components
                
    return np.float32(255*frame)

# Function plot bar chart with scores values
def bar_chart(obtained_scores, classes_names):
    
    # Arranging X axis
    x_positions = np.arange(obtained_scores.size)

    # Creating bar chart
    bars = plt.bar(x_positions, obtained_scores, align='center', alpha=0.6)

    # Highlighting the highest bar
    bars[np.argmax(obtained_scores)].set_color('red')

    # Giving labels to bars along X axis
    plt.xticks(x_positions, classes_names, rotation=25, fontsize=10)

    # Giving names to axes
    plt.xlabel('Class', fontsize=20)
    plt.ylabel('Value', fontsize=20)

    # Giving name to bar chart
    plt.title('Obtained Scores', fontsize=20)

    # Adjusting borders of the plot
    plt.tight_layout(pad=2.5)

    # Initializing object of the buffer
    b = io.BytesIO()

    # Saving bar chart into the buffer
    plt.savefig(b, format='png', dpi=200)

    # Closing plot with bar chart
    plt.close()

    # Moving pointer to the beginning of the buffer
    b.seek(0)

    # Reading bar chart from the buffer
    bar_image = np.frombuffer(b.getvalue(), dtype=np.uint8)

    # Closing buffer
    b.close()

    # Decoding buffer
    bar_image = cv2.imdecode(bar_image, 1)

    # Resize frame to HEIGH X WIDTH
    bar_image = cv2.resize(bar_image, (WIDTH, HEIGH))

    # Returning Numpy array with bar chart
    return bar_image

# Function get 3 frames and collect them to 1 frame 
def collaction_frames(left_frame, mid_frame, right_frame):

    # Insert all frames to array for scan it
    frames = [left_frame, mid_frame, right_frame]

    # Change all frames to 3 chanels    
    for i in range(len(frames)):
        
        # Check if frames[i][3] is exis
        if len(frames[i].shape) < 3:
            
            # Adding 3-Dimension to the image
            frames[i] = frames[i][:,:,np.newaxis]
        
        # Find frames that not 3 channels
        if frames[i].shape[2] != 3:
             
            # Function expand mask's dimension from 1 to 3 odimensions    
            frames[i] = np.repeat(frames[i], 3, axis=2)
                
    # Define frames in the right order
    left_frame = frames[0]
    mid_frame = frames[1]
    right_frame = frames[2]
    
    # Create one window that contain: frame, tracking, mask
    collaction_frame = np.hstack((left_frame, mid_frame, right_frame))
    
    return collaction_frame

# Function takes 6 windows and collect them to one main window
def build_main_window(frame, inf_frame, tracking_frame, cut_fragment_rgb, scores_frame, frame_mask):
    
    # Function return one window that contain (frame_bgr, track_frame, mask_frame)
    upper_window = collaction_frames(frame, inf_frame, tracking_frame)

    # Function return one window that contain (cut_fragment_bgr_frame, scores_frame, mask_frame)
    lower_window = collaction_frames(cut_fragment_rgb, scores_frame, frame_mask)

    # Create one window that contain: upper_window and lower_window
    main_window = np.vstack((upper_window, lower_window))    
    
    return main_window

# Function display the frames on the screen in one window
def display_windows(frame, tracking_mask, frame_mask):  
        
    # Copy frame to work with deffrent variable
    tracking_mask = cv2.resize(tracking_mask, (WIDTH, HEIGH))
        
    # Copy frame to work with deffrent variable
    frame_mask = cv2.resize(frame_mask, (WIDTH, HEIGH))
        
    # Expend mask dimension to 3 dimension
    frame_mask = expands_dimensions(frame_mask)       
    
    # Create left window    
    main_window = np.hstack((frame, tracking_mask, frame_mask))

    # Plotting all the frames in one window
    cv2.imshow("Main_Window", main_window)     
    
# Function manage the frames reader variables like fps etc'
def reader_manger(FPS, fps_start, counter_fps, tracking_on):
       
    # Variable says if keep reading frame or quit
    quit = False

    # Stopping the timer for FPS
    fps_stop = timer()

    # Print FPS every 1 second
    if 1.0 <= fps_stop - fps_start:

        # Define FPS
        FPS = counter_fps

        # Reset FPS counter
        counter_fps = 0

        # Restart timer for FPS
        fps_start = timer()       

    # Function waits for key to be pressed    
    key = cv2.waitKey(10) % 256

    # If 'n' is pressed, we catchs the frame and define it as the background
    if key == ord('n'):
        tracking_on = False

    # If 'q' key is pressed then quit from app
    if key == ord('q'):
        quit = True   

    return FPS, fps_start, counter_fps, tracking_on, quit

### Define the Tracking and Detection Objects functions ###

In [5]:
# Function manage the detection and return status and coordinates
def detection_manager(trackers, counters, frames):
    
    # Initialize label variables
    label = ""
    scores = []
    detection_time = 0   
    prediction_timer = 0 
    tracking_on = False  
    
    # Extract function variables   
    tracker = trackers[0]
    frame, frame_mask, cut_fragment_rgb, scores_frame = [f for f in frames]
    counter_frames_prediction, counter_birds_prediction = [c for c in counters]

    # Function return array of all contours we found
    contours, _ = cv2.findContours(frame_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sorted the contours and define the larger first
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    # Scan the contours list
    for contour in contours:

        # Return square area of the given contour
        contour_area = cv2.contourArea(contour)

        # Find contours between MIN_OBJECT_AREA to MAX_OBJECT_AREA
        if contour_area < MAX_OBJECT_AREA:
            if MIN_OBJECT_AREA < contour_area:

                # Increase prediction counter
                counter_frames_prediction += 1                
                
                # Function return the cutted rgb fragment in model input_shape
                cut_fragment_rgb, rectangle = cut_fragment(frame, contour)
                
                # Function predict the cutted fragment and return the result
                scores, label, prediction_timer, detection_time = prediction_manager(cut_fragment_rgb)    
                
                # Start traking after the detected bird
                if label == "bird": 
                    
                    # Increasing Birds prediction counter
                    counter_birds_prediction += 1

                    # Function add object to the tracker, save detection and drawing rectangle
                    scores_frame, tracker = detected_bird(frame, tracker, rectangle, detection_time, label, scores)
                                                    
                    # Update tracking status
                    tracking_on = True
                    break

                # End of if not detected birds        
                else: 
                    # Increasing fail predictions counter
                    counter_fail_predictions = counter_frames_prediction - counter_birds_prediction              
            else:
                # Contour is a sorted list so all the rest items irrelevant
                break
    
    # Compress the function variable into an arrays
    frames = [frame, frame_mask, cut_fragment_rgb, scores_frame]
    detected = [label, scores, tracker, tracking_on, prediction_timer, detection_time]
    counters = [counter_frames_prediction, counter_birds_prediction]
    
    return frames, detected, counters

# Function cut the detected fragment and return it
def cut_fragment(frame, contour):
                
    # Get an approximate rectangle coordinates
    (x_min, y_min, box_width, box_height) = cv2.boundingRect(contour)

    # bounding_boxes contain x1, y1, x2, y2, coordinates and not width and heigh
    (x_min, y_min, box_width, box_height) = x_min*SCALE, y_min*SCALE, box_width*SCALE, box_height*SCALE
    
    # Cutting detected fragment from BGR frame
    cut_fragment_rgb_frame = frame[y_min: y_min + box_height, x_min: x_min + box_width]
    
    # Resize the fragment to the right frame shape in the main window
    cut_fragment_rgb_frame = cv2.resize(cut_fragment_rgb_frame, (WIDTH, HEIGH))
    
    # Create rectangle object from the boundaries coordinates
    rectangle = np.array([x_min, y_min, box_width, box_height])

    return cut_fragment_rgb_frame, rectangle

# Function manage the prediction part and return prediction results
def prediction_manager(cut_fragment_rgb):

    # Measuring classification time
    start = timer()

    # Function return all scores of model predictions
    scores = prediction_model(cut_fragment_rgb)       

    # End of Measuring classification time
    end = timer()
                             
    # Current time of detection object
    detection_time = strftime("%d/%m/%Y %H:%M:%S")  

    # Calculate the time that needed to predict the fragment
    prediction_timer = end - start
    
    # Finds the lables array index by the max score index of model prediction
    index = np.argmax(scores)

    # Define the label for the cut_fragment from labels array
    label = labels[index]    
    
    return scores, label, prediction_timer, detection_time

# Function predict model's output from the cutted fragment    
def prediction_model(cut_fragment_rgb):
        
    # Create a copy of the cut_fragment_bgr frame  
    fragment = cut_fragment_rgb.copy()
    
    # Resizing frame to the right shape of the model's input
    fragment = cv2.resize(fragment, (width, hight), interpolation=cv2.INTER_CUBIC)
    
    # Extending dimension from (height, width, channels) to (1, heigh, width, channels)
    fragment = fragment[np.newaxis, :, :, :]

    # Predict score from model
    scores = model.predict(fragment)

    return scores

# Function drawing rectangle around the predicted object
def drawing_rectangle(frame, rectangle, label):

    # Get an approximate rectangle coordinates
    (x_min, y_min, box_width, box_height) = [int(a) for a in rectangle]

    # Drawing bounding box on the current BGR frame
    cv2.rectangle(frame, (x_min, y_min), (x_min + box_width, y_min + box_height), (0, 255, 0), 3)

    # Putting text with label on the current BGR frame
    cv2.putText(frame, label, (x_min - 5, y_min - 25), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

    return frame 

# Function initialize variables of bird detected
def detected_bird(frame, tracker, rectangle, detection_time, label, scores):
    
    # Initialize Deque points
    points.clear()

    # Makes a sound to alert about bird
    #winsound.PlaySound(soundPath, winsound.SND_FILENAME)
                                
    # Function return a bgr frame with rectangle around the cut fragment
    frame = drawing_rectangle(frame, rectangle, label)  
    
    # Save detection event in date-time format
    save_detection_event(frame, detection_time, label)

    # Function return a frame with all the labels scores     
    scores_frame = bar_chart(scores[0], labels)  

    # Add the detected object to the tracker 
    tracker.init(frame, rectangle)   
    
    return scores_frame, tracker
    
# Function manage the tracking and return the status
def tracking_manager(tracking_function_vraiables):     

    # Set the default status
    tracking_on = False    
    
    # Extract the function arrays
    tracking_frames = tracking_function_vraiables[1]   
    tracking_counter = tracking_function_vraiables[2]
    tracking_variables = tracking_function_vraiables[0]
    
    # Extract the function variable
    tracker, scores, labels = [t for t in tracking_variables]
    frame, frame_mask, tracking_mask = [f for f in tracking_frames]
    prediction_timer, counter_frames_reading, FPS = [c for c in tracking_counter]

    # Get the bounding box from the frame
    (success, contour_box) = tracker.update(frame)

    # Keep tracking after the object
    if success:

        # Get the coordinates of the rectangle around the object
        (x, y, w, h) = [int(a) for a in contour_box]

        # Check if coordinates is in the frame boundaries
        if 0 <= x and x+w <= WIDTH and 0 <= y and y+h <= HEIGH: 

            # Set tracking status ON
            tracking_on = True                 

            # Cut the fragment from the mask frame
            cut_fragment_track = tracking_mask[int(y/SCALE):int((y+h)/SCALE), int(x/SCALE):int((x+w)/SCALE)] 

            # Cut the fragment from the mask frame
            cut_fragment_mask = frame_mask[int(y/SCALE):int((y+h)/SCALE), int(x/SCALE):int((x+w)/SCALE)] 
            
            # Checking if tracking is still running after the object or not
            if counter_frames_reading%tracking_check == 0: 

                # Calculate the pixels values sum, zero means background
                if np.sum(cut_fragment_track) == 0:

                    # Set tracking status OFF
                    tracking_on = False      
            
            # Function return a frame with the tracking of the cut fragment
            tracking_frame = drawing_tracking(frame, contour_box, scores, tracking_on, prediction_timer, FPS +1)      

            # Drawing bounding box on the current BGR frame        
            cv2.rectangle(frame, (x,y), (x+w,y+h), (100,255,0), 2)

            # Putting text with label on the current BGR frame
            cv2.putText(frame, label, (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)                          
            
    # Function return a frame with the tracking of the cut fragment
    tracking_frame = drawing_tracking(frame, contour_box, scores, tracking_on, prediction_timer, FPS +1) 
    
    return frame, tracking_frame, tracker, tracking_on

### Define the Windows and Information Functions ###

In [6]:
# Function save the frame of the detected event
def save_detection_event(frame, detection_time, label):

    # Create a copy of time object
    timer = detection_time
    
    # Save detection_time in format that fit to files
    timer = timer[:2] + "-" + timer[3:5] + "-" + timer[6:13] + "-" + timer[14:16] + "-" + timer[17:]

    # Define the name of the image
    image_name = label + ' ' + str(timer)+'.jpg'
    
    # Define the file adress
    finalPath = os.path.join(outPutPath, image_name)
    
    # Save the frame of detection event
    cv2.imwrite(finalPath, frame)

    # Checking if there is detection
    if 0 < len(label): 
        
        # Order all line to the same length
        if len(label) < len("airplane"):
            label = str(label) + (len("airplane") - len(label))*" "
            
        # Create string of detection event in date-time format
        detection = "Detected " + str(label) + " at: " + detection_time
        
    # Writing the detection event into the file in txtPath location
    with open(txtPath, 'a') as f:
        f.write(detection)
        f.write('\n')
        
# Function create frame that follow the object movement
def drawing_tracking(frame, contour_box, scores, tracking_on, prediction_timer, FPS):
           
    # Variable for ain boundries
    shift_left = int(WIDTH/5) +10
    shift_down = int(HEIGH -FIRST_ROW)
    
    # Define the prediction time for fragment
    model_prediction_time = prediction_timer
    
    # Get the coordinates of the rectangle around the object
    (x_min, y_min, box_width, box_height) = [int(a) for a in contour_box]
    
    # Getting current center coordinates of the bounding box
    center = (int(x_min + box_width / 2), int(y_min + box_height / 2))

    # Adding current point to the queue
    points.appendleft(center)                        
        
    # Creating image with black background
    track_frame = np.zeros(frame.shape, np.uint8)

    # Changing background to Black color
    track_frame[:, :, 0] = 0
    track_frame[:, :, 1] = 0
    track_frame[:, :, 2] = 0

    # Visualizing tracker line
    for i in range(1, len(points)):
        
        # If no points collected yet
        if points[i - 1] is None or points[i] is None:
            continue

        # Draw the line between points
        cv2.line(track_frame, points[i - 1], points[i], (50, 200, 50), 2)

    # Adding text with center coordinates of the bounding box
    cv2.putText(track_frame, 'X: {0}'.format(center[0]), (FIRST_COL -25, FIRST_ROW +10),
                cv2.FONT_HERSHEY_SIMPLEX, FONT_SIZE*4, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.putText(track_frame, 'Y: {0}'.format(center[1]), (FIRST_COL -25, FIRST_ROW + 50),
                cv2.FONT_HERSHEY_SIMPLEX, FONT_SIZE*4, (255, 255, 255), 2, cv2.LINE_AA)

    # Adding text with time spent for 2D convolution for current frame
    cv2.putText(track_frame, 'Time : ' + '{0:.3f}'.format(model_prediction_time), (FIRST_COL -25, HEIGH -40),
                cv2.FONT_HERSHEY_SIMPLEX, FONT_SIZE*3, (255, 255, 255), 2, cv2.LINE_AA)  

    # Adding text with score of convolution for current frame
    cv2.putText(track_frame, 'Score : ' + '{0:.3f}'.format(scores[0][np.argmax(scores)]), (FIRST_COL -25, HEIGH -10),
                cv2.FONT_HERSHEY_SIMPLEX, FONT_SIZE*3, (255, 255, 255), 2, cv2.LINE_AA)  

    # Adding text with current label on the frame
    cv2.putText(track_frame, "FPS: " + str(FPS), (WIDTH -FIRST_COL*3, HEIGH -FIRST_ROW +10), 
                cv2.FONT_HERSHEY_SIMPLEX, FONT_SIZE*4, (255, 255, 255), 3, cv2.LINE_AA)
       
    # If Tracking is on - put text on frame
    if tracking_on:
        
        # Adding text with tracking status on the frame
        cv2.putText(track_frame, 'Tracking ', (WIDTH -FIRST_COL*3, FIRST_ROW +20), 
                    cv2.FONT_HERSHEY_TRIPLEX, FONT_SIZE*3, (50, 200, 50), 1, cv2.LINE_AA)
        
    # Delete the "Tracking" alert from the screen    
    if not tracking_on:

        track_frame[WIDTH -135:, 0:FIRST_ROW + 15, 0] = 0
        track_frame[WIDTH -135:, 0:FIRST_ROW + 15, 1] = 0
        track_frame[WIDTH -135:, 0:FIRST_ROW + 15, 2] = 0
        
        track_frame[center[1], center[0], 0] = 0
        track_frame[center[1], center[0], 1] = 0
        track_frame[center[1], center[0], 2] = 0
        
        points.clear()

    return track_frame
 
# Function create the information frame in the main window
def info_frame(inf_frame, fps, counter_images_reading, counter_images_processing, counter_images_tracking, time, label):
    
    left_boundary = int(FIRST_COL -40)
    row_space = int(ROWS_SPACE/2) +5
    
    # Variable for ain boundries
    shift_left = int(WIDTH/5) +10
    shift_down = int(HEIGH -FIRST_ROW)
        
    # Define the font size as 2*FONT_SIZE=(HEIGHT/1000)
    font_size = (FONT_SIZE*3)
    
    # Initilize frame with white background
    inf_frame = empty_frame(inf_frame)
    
    # Variable hold amount images processing in percentage
    processing_percentage = str(int((counter_images_processing * 100) / counter_images_reading))+"%"
    tracking_percentage = str(int((counter_images_tracking * 100) / counter_images_reading))+"%"
    
    # Adding text with right time for current frame
    cv2.putText(inf_frame, strftime("%H:%M:%S"), (WIDTH -shift_left, shift_down), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA)
    
    # Adding text with right date for current frame
    cv2.putText(inf_frame, strftime("%d/%m/%Y"), (3*shift_left+20, shift_down +30), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA)  
    
    # Adding text with Model name of the app
    cv2.putText(inf_frame, "Model: ResNet50", (left_boundary, FIRST_ROW), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA)  
    
    # Adding text with the training datset of the app
    cv2.putText(inf_frame, "Dataset: Cifar10", (left_boundary, FIRST_ROW +row_space), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA)  
    
    # Adding text with Camera FPS for current frame
    cv2.putText(inf_frame, 'Camera FPS: ' + '{0:.0f}'.format(fps), (left_boundary, FIRST_ROW + (row_space*2)), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA)  
    
    # Adding text with reading frames counter for current frame
    cv2.putText(inf_frame, 'Reading Frames: ' + '{0:.0f}'.format(counter_images_reading),(left_boundary, FIRST_ROW +(row_space*3)),
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA) 
    
    # Adding text with time spent for 2D convolution for current frame
    cv2.putText(inf_frame, 'Tracking Images: ' + tracking_percentage, (left_boundary, FIRST_ROW +(row_space*4)), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA) 
    
    # Adding text with processing images counter for current frame
    cv2.putText(inf_frame, 'Processing Images: ' + processing_percentage, (left_boundary, FIRST_ROW +(row_space*5)), 
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 1, cv2.LINE_AA) 
        
    # Define the next line where the detections text will start
    end_text_line = (FIRST_ROW +(row_space*6)) +15
    
    # Function put text of all detections events on the frame
    plotting_detections(inf_frame, label, time, end_text_line)
    
    return inf_frame

# Function coninue info_frame Function and put text of detection in info frame
def plotting_detections(inf_frame, label, time, start_text_line):

    # Define the font size as 2*FONT_SIZE=(HEIGHT/1000)
    font_size = (FONT_SIZE*3) -0.1
    
    # Find max label string length
    max_len = len('airplane')
                     
    # Checking if there is detection
    if 0 < len(label): 

        # Order all line to the same length
        if len(label) < max_len:
            label = str(label) + (max_len - len(label))*" "
            
        detection = "Detected " + str(label) + " at: " + str(time)

        # Insert first element to log array
        if len(log) == 0:
            log.append(detection)
        else:
            # Check there ara different time of detection event
            if detection != log[-1]:
                  
                # Calculate the how many minutes passed from the last detection    
                time_past_in_minutes = detection_timer(time, log[-1])
                log.append(detection)
    
    # Line number we start write objects we detected
    line = start_text_line
    
    # Define the end line bounderies
    end_line = (HEIGH - FIRST_ROW -20)
    
    # Scan all the detections object to plot them on the frame
    for i in range(len(log)):
        
        # Variable represent the detection event as a string
        event = log[i]
        
        # Check frames boundaries including text
        if end_line <= line:
            
            # Delete old detection by set pixels to while and initilize line to start
            inf_frame[start_text_line -20:end_line,:] = 255
            
            # Back to line 130
            line = start_text_line
        
        # Adding text with DETECTION EVENT for current frame
        cv2.putText(inf_frame, event, (10, line), cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 255), 1, cv2.LINE_AA) 
        
        # skip to the next line
        line += int(HEIGH/10) - 5  
        
# Function writes all the app's information into a text file
def write_info_txt(counter_frames_reading, counter_frames_tracking, counter_frames_processing, counter_images_processing, 
                   counter_frames_predictions, counter_birds_predictions):
    
    counter_fail_predictions = counter_frames_predictions -counter_birds_predictions
    counter_frames_not_processing = counter_frames_processing -counter_images_processing
    
    # Stores all the application's information in array and then writes them into a file
    app_info = []
    app_info.append(str("\n###################### Application Information ##################\n"))
    app_info.append(str("counter frames reading: " + str(counter_frames_reading)))
    app_info.append(str("counter frames tracking: " + str(counter_frames_tracking)))
    app_info.append(str("counter frames processing: " + str(counter_frames_processing)))
    app_info.append(str("counter images processing: " + str(counter_images_processing)))
    app_info.append(str("counter frames predictons: " + str(counter_frames_predictions)))
    app_info.append(str("counter birds predictions: " + str(counter_birds_predictions)))
    app_info.append(str("counter fail predictions: " + str(counter_fail_predictions)))
    app_info.append(str("counter frames not processing: " + str(counter_frames_not_processing)))
    app_info.append(str(" "))
    app_info.append(str("Tracking Frames: "+ str(int((counter_frames_tracking * 100) /counter_frames_reading))+"%"))
    app_info.append(str("Processing Frames: "+str(int((counter_frames_processing * 100) /counter_frames_reading))+"%"))
    app_info.append(str("Right Predictions: "+'{0:.0f}'.format((counter_birds_predictions*100)/counter_frames_predictions)+"%"))
    app_info.append(str(" "))
    app_info.append(str("############################## END ################################\n\n"))

    # Writing all application's information into a text file
    with open(txtPath, 'a') as f:
        f.write('\n'.join(app_info))   

### Images Processing and Model Predictions ###

Reading frames from camera frame by frame and recognize movments to detect object in the frame. The detected object is cutted from the original frame and resize to the model input shape. The cutted object sent to model and gives us the label prediction with the score. After we detected the object we drawing green rectangle around it and start to tracking after the object. We display to the user one window that divide to six small windows:

1. The frame that we read from camera with the rectangle around the object and the label above it.
2. Window that contain information of the image processing like model's name, detection event, time, etc'.
3. Window that draw a line that present the tracking of the detected object.
4. Frame of the object we cutted form the original frame and sent to the model.
5. Window that show us graph with all the labels and their score from predicted object.
6. Window that display us the mask we use to detect object and movements.


In [7]:
#### Loop reading frame by frame and processing them
while True:
    
    # Capturing frames one-by-one from camera
    ret, frame = camera.read()

    # If the frame was not retrieved then we break the loop
    if not ret or frame is None:
        break
    
    # Increasing frames counter
    counter_frames_reading += 1
    
    # Define the fps of the loop using cv2 function
    fps = int(camera.get(cv2.CAP_PROP_FPS))

    # Function return 3 diffrent kind of frames
    frame, frame_rgb, tracking_mask, frame_mask, last_frame = preproccess_frames(frame, last_frame)
    
    # Start Proccessing state only if is not quite frames
    if 0 < np.sum(tracking_mask):
    
        # Increasing FPS counter
        counter_fps += 1
        
        # Increasing images counter
        counter_images_processing +=1
        
        # Start Tracking state
        if tracking_on == True:
            
            if counter_frames_reading%1 == 0:

                # Increase tracking counter
                counter_frames_tracking += 1

                # Compress the fucntion variables into an array
                tracking_variables = [tracker, scores, labels]
                tracking_frames    = [frame, frame_mask, tracking_mask]
                tracking_counter   = [prediction_timer, counter_frames_reading, FPS]
                tracking_function_vraiables = [tracking_variables, tracking_frames, tracking_counter]

                # Function manage the the tracking part  
                frame, tracking_frame, tracker, tracking_on = tracking_manager(tracking_function_vraiables)

        # Start Detection state
        else:
            # Increase tracking counter
            counter_frames_processing += 1         
            
            # Compres function's variables into an array
            trackers = [tracker]
            counters = [counter_frames_prediction, counter_birds_prediction]
            frames   = [frame, frame_mask, cut_fragment_rgb, scores_frame]
                        
            # Function find countors around the objects and return drawn frame
            frames, detected, counters = detection_manager(trackers, counters, frames)
             
            # Extract the variables that returned from detection_manager function
            frame, frame_mask, cut_fragment_rgb, scores_frame = [frame for frame in frames]
            label, scores, tracker, tracking_on, prediction_timer, detection_time = [var for var in detected]
            counter_frames_prediction, counter_birds_prediction = [counter for counter in counters]

            
        # Function return frame that contain the app stats and informations
        inf_frame = info_frame(inf_frame, fps, counter_frames_reading, counter_frames_processing, counter_frames_tracking, 
                               detection_time, label)

        # Create one window that contain: upper_window and lower_window
        main_window = build_main_window(frame, inf_frame, tracking_frame, cut_fragment_rgb, scores_frame, frame_mask) 

        # Function manage the frames reader variables
        FPS, fps_start, counter_fps, tracking_on, quit = reader_manger(FPS, fps_start, counter_fps, tracking_on)   

        # Plotting all the frames in one window
        cv2.imshow("Main_Window", main_window)     

        # If quit is true so we stop read frames
        if quit == True:
            break
    
# Releasing camera
camera.release()

# Destroying all opened OpenCV windows
cv2.destroyAllWindows()      

# Function write all the app's information into a txt file
write_info_txt(counter_frames_reading, counter_frames_tracking, counter_frames_processing, 
               counter_images_processing, counter_frames_prediction, counter_birds_prediction)

