<h1>Vertiefungsprojekt 1
<h1>Master of Science in Engineering, Profile Geomatics
<h1>Patrick Keusch

<h3>Source code partially from AIGuy, adaptions by Patrick Keusch
https://github.com/theAIGuysCode/yolov4-custom-functions
https://github.com/theAIGuysCode/yolov4-deepsort
    

In [None]:
import os
# comment out below line to enable tensorflow logging outputs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import time
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
from absl import app, flags, logging
from absl.flags import FLAGS
import core.utils as utils
from core.yolov4 import filter_boxes
from tensorflow.python.saved_model import tag_constants
from core.config import cfg
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
# deep sort imports
from deep_sort import preprocessing, nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_detections as gdet
from vidstab import VidStab
import matplotlib.pyplot as plt

In [None]:
### Set Parameters
# Set Framework for Tracking-Points Storage
from _collections import deque
pts = [deque(maxlen=3000) for _ in range(50000)]

# Set Parameters for Subsampling Import Frame
counter = []
n_width = int(6) # Number of subsets horizontally
n_height = int(4) # Number of subsets vertically
subset_x_start = int(1750) # Only needed for single tile processing, Left top corner (x = Value horizontally)
subset_y_start = int(500) # Only needed for single tile processing, Left top corner (y = Value vertically)
size_of_subset = int(1000) #Dimension of Subsampling Frame, e.g. 416

frame_num = 0

# Create Empty Bounding-Boxes and Score-Arrays for Object Detection to start with
final_bboxes = np.empty((0,4)) # [min_x, miny, max_x, max_y]
final_scores = np.empty((0,1))

# Set Parameters for Object Detection
framework = str('tf') # tf for Tensorflow-Framework
weights = str('./checkpoints/yolov4-416') # Path to Weights-file, original pretrained weights
#weights = str('./checkpoints/yolov4-416-obj-2400') # Path to Weights-file, own training

size = int(416) # Size of Input-Image of Network, e.g. 416 in case of yolo --> Targeted Resize Dimension

#Path to Video-Files
#video = str('./data/video/20210323-Schräg/Schräg-90m-45Grad-1.mov') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160
video = str('./data/video/20210408-Nadir/Nadir-90m-7.mov') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160
#video = str('./data/video/20210323-Schräg/Schräg-140m-45Grad-2.mov') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160
#video = str('./data/video/20210323-Schräg/Schräg-140m-60Grad-1.mov') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160
#video = str('./data/video/20210408-Nadir/Nadir-90m-5.mov') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160
#video = str(r'C:\Users\patri\Dropbox\FHNW\Geomatik_Privat\_MSc\VP1\03_Rohdaten\03_01_Aufnahmen\IGEO\Befliegungen_Nadir_BS-31-03-21\D1\DJI_0001.MP4') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160
#video = str('./data/video/20210323-Nadir/Nadir-140m-2.mov') # Path to Input-Video, '0' for Webcam, #Dimension 3840 x 2160



video_stabilized = str('./data/video//Output/Stabilized.avi')
output_video_subframe = str('./data/video/Output/D1-DJI_0001-Detektion-Subframe.avi') # Path to Output-Video
output_video = str('./data/video/Output/D1-DJI_0001--Detektion.avi') # Path to Output-Video


bbox_output = str('./data/video/Output/D1-DJI_0001--Detektion-bbox_output.txt') # Path to BBox-Output


fps = float(30.0) # Frames per Second for Output Video File
size_output = (size_of_subset,size_of_subset)
iou = float(0.45) # IOU-Threshold, e.g. 0.45
score = float(0.50) # Score-Threshold, e.g. 0.50

# Set Parameters for Tracking
# Definition of the parameters
max_cosine_distance = 0.9 # e.g. 0.4
nn_budget = None #e.g. None
nms_max_overlap = 1.0 # e.g. 1.0








In [None]:
def get_anchors(anchors_path, tiny=False):
    anchors = np.array(anchors_path)
    return anchors.reshape(3, 3, 2)

def read_class_names(class_file_name):
    names = {}
    with open(class_file_name, 'r') as data:
        for ID, name in enumerate(data):
            names[ID] = name.strip('\n')
    return names

def format_boxes(bboxes, image_height, image_width):
    for box in bboxes:
        ymin = int(box[0] * image_height)
        xmin = int(box[1] * image_width)
        ymax = int(box[2] * image_height)
        xmax = int(box[3] * image_width)
        width = xmax - xmin
        height = ymax - ymin
        box[0], box[1], box[2], box[3] = xmin, ymin, width, height
    return bboxes

In [None]:
# load configuration for object detector
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)


STRIDES = np.array(cfg.YOLO.STRIDES)

ANCHORS = get_anchors(cfg.YOLO.ANCHORS, False)
XYSCALE = cfg.YOLO.XYSCALE
NUM_CLASS = len(read_class_names(cfg.YOLO.CLASSES))

In [None]:
# initialize deep sort
model_filename = 'model_data/mars-small128.pb' # TF-Modell for DeepSort
encoder = gdet.create_box_encoder(model_filename, batch_size=2)
# calculate cosine distance metric
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
# initialize tracker
tracker = Tracker(metric)

# Load Object-Detetion Model
saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING])
infer = saved_model_loaded.signatures['serving_default']

In [None]:
# Stabilize Video and save to File "video_stabilized" (takes a while, not working at the moment)
stabilizer = VidStab(kp_method='FAST', threshold=42, nonmaxSuppression=False)
stabilizer.stabilize(input_path = video, output_path = video_stabilized, border_type = 'black', border_size=100)

stabilizer.plot_trajectory()
plt.show()

stabilizer.plot_transforms()
plt.show()



In [None]:
# begin video capture
vid = cv2.VideoCapture(video)

# get dimension of video input
width_input  = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))   # width`
height_input = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))  #  `height`


# initialize video save optionen
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_video_subframe,fourcc, fps, size_output) # Output for only one subframe
out_a = cv2.VideoWriter(output_video,fourcc, fps, (width_input,height_input)) # Output of whole frame

In [None]:
print("Init successful!")

###### <h1> Code for running through only 1 sub-frame-tile

In [None]:
bbbox_output_file = open(bbox_output, "w") # Open File to store BBox-Coordinates
# While-loop = True
running = True
while running:
    # Capture frame-by-frame
    return_value, main_frame = vid.read()
    main_frame = cv2.cvtColor(main_frame, cv2.COLOR_BGR2RGB)
    main_frame_edit = main_frame
    sub_frame = main_frame[subset_y_start:subset_y_start + size_of_subset,
                           subset_x_start:subset_x_start + size_of_subset]

    

    
    frame_num +=1
    print('Frame #: ', frame_num)
    image_data = cv2.resize(sub_frame, (size, size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    start_time = time.time()

    batch_data = tf.constant(image_data)
    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]
    
    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
    scores=tf.reshape(
        pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
    max_output_size_per_class=500, #50
    max_total_size=500, #50
    iou_threshold=iou,
    score_threshold=score)
                        

    # convert data to numpy arrays and slice out unused elements
    num_objects = valid_detections.numpy()[0]
    bboxes = boxes.numpy()[0]
    bboxes = bboxes[0:int(num_objects)]
    scores = scores.numpy()[0]
    scores = scores[0:int(num_objects)]
    classes = classes.numpy()[0]
    classes = classes[0:int(num_objects)]
    

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
    original_h, original_w, _ = sub_frame.shape
    bboxes = utils.format_boxes(bboxes, original_h, original_w)
    
    # store all predictions in one parameter for simplicity when calling functions
    pred_bbox = [bboxes, scores, classes, num_objects]
    
    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)
    
    # by default allow all classes in .names file
    allowed_classes = list(class_names.values())
            
    # custom allowed classes (uncomment line below to customize tracker for only people)
    #allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike', 'bicycle']

    # loop through objects and use class index to get class name, allow only classes in allowed_classes list
    names = []
    deleted_indx = []
    for i in range(num_objects):
        class_indx = int(classes[i])
        class_name = class_names[class_indx]
        if class_name not in allowed_classes:
            deleted_indx.append(i)
        else:
            names.append(class_name)
    names = np.array(names)
    count = len(names)
                
    cv2.putText(sub_frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
    print("Objects being tracked: {}".format(count))
    # delete detections that are not in allowed_classes
    bboxes = np.delete(bboxes, deleted_indx, axis=0) # [175. 619. 123.  77.] --> xmin, ymin, width, height
    scores = np.delete(scores, deleted_indx, axis=0) # [0.98072225 0.7607064]
    
    #print("BBoxes : " + str(bboxes))
    
    # encode yolo detections and feed to tracker
    features = encoder(sub_frame, bboxes)
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]
    
    #initialize color map
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
    
    # run non-maxima supression
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]       
    
    # Call the tracker
    tracker.predict()
    tracker.update(detections)
    
    # update tracks
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue 
        bbox = track.to_tlbr()
        class_name = track.get_class()
     
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]
                
    #Trajectories
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2))
        pts[track.track_id].append(center)
        for j in range(1, len(pts[track.track_id])):
            if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                continue
            thickness = 2
            #thickness = int(np.sqrt(64/float(j+1))*2)
            cv2.line(sub_frame, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
    
    # draw bbox on screen
        bbox_topleft = (subset_x_start + int(bbox[0]), subset_y_start + int(bbox[1]))
        bbox_bottomright = (subset_x_start +  int(bbox[2]), subset_y_start  + int(bbox[3]))
        bbox_topleft_fill = (subset_x_start + int(bbox[0]), subset_y_start + int(bbox[1]-30))
        bbox_bottomrigh_fill = (subset_x_start + int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, subset_y_start + int(bbox[1]))
        bbox_text_position = (subset_x_start + int(bbox[0]), subset_y_start + int(bbox[1]-10))
        cv2.rectangle(main_frame_edit, bbox_topleft, 
                      bbox_bottomright, color, 2)
        cv2.rectangle(main_frame_edit, bbox_topleft_fill, 
                      bbox_bottomrigh_fill, color, -1)
        cv2.putText(main_frame_edit, class_name + "-" + str(track.track_id),bbox_text_position,0, 0.75, (255,255,255),2)
    
    # Print and Store Details of BBox in Console and File

        print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), 
                                                                                            class_name, 
                                                                                            (subset_x_start + int(bbox[0]), 
                                                                                             subset_y_start + int(bbox[1]), 
                                                                                             subset_x_start + int(bbox[2]), 
                                                                                             subset_y_start + int(bbox[3]))))
        
        bbbox_output_file.write("Frame-Number: "+ str(frame_num)+", Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {} \n".format(str(track.track_id), 
                                                                                            class_name, 
                                                                                            (subset_x_start + int(bbox[0]), 
                                                                                             subset_y_start + int(bbox[1]), 
                                                                                             subset_x_start + int(bbox[2]), 
                                                                                             subset_y_start + int(bbox[3]))))
    
    # calculate frames per second of running detections
    fps = 1 / (time.time() - start_time) #1.0
    print("FPS: %.2f" % fps)
    result = np.asarray(sub_frame)
    result = cv2.cvtColor(sub_frame, cv2.COLOR_RGB2BGR)

    

    cv2.imshow("Output Video", result)

    #Show single subtile of main frame
    main_frame_edit = cv2.rectangle(main_frame_edit, (subset_x_start,subset_y_start),
                                    (subset_x_start + size_of_subset,subset_y_start + size_of_subset),
                                    (255,0,0), 5) 
    main_frame_tile = np.asarray(main_frame_edit)
    main_frame_tile = cv2.cvtColor(main_frame_tile, cv2.COLOR_RGB2BGR)
    cv2.namedWindow("Main_Frame", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("Main_Frame", 1920,1080)
    cv2.imshow("Main_Frame", main_frame_tile)
    
    
    
    out.write(result)
    out_a.write(main_frame_tile)
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        running = False
        break
cv2.destroyAllWindows()
bbbox_output_file.close() # Close BBox-Text-File




<h1> Loop for running through all sub-frame-tiles

In [None]:
bbbox_output_file = open(bbox_output, "w") # Open File to store BBox-Coordinates

#Initialize arrays for summed up bboxes per main frame
bboxes_all = np.array([[1,1,10,10]])
bboxes_all = bboxes_all.astype('float32')
scores_all = np.array([0.9999999999], dtype = 'float32')
names_all = ['person']



# While-loop = True
running = True
while running:
    # Capture frame-by-frame
    return_value, main_frame = vid.read()
    main_frame = cv2.cvtColor(main_frame, cv2.COLOR_BGR2RGB)
    main_frame_edit = main_frame
    frame_num +=1
    print('Frame #: ', frame_num)
    
    for u in range(1,n_width):
        for v in range (1,n_height):
            sub_frame = main_frame[v * size_of_subset:(v+1) * size_of_subset,
                                   u * size_of_subset:(u+1) *  size_of_subset]

    
            image_data = cv2.resize(sub_frame, (size, size))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)
            start_time = time.time()

            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=500, #50
            max_total_size=500, #50
            iou_threshold=iou,
            score_threshold=score)
                        

            # convert data to numpy arrays and slice out unused elements
            num_objects = valid_detections.numpy()[0]
            bboxes = boxes.numpy()[0]
            bboxes = bboxes[0:int(num_objects)]
            scores = scores.numpy()[0]
            scores = scores[0:int(num_objects)]
            classes = classes.numpy()[0]
            classes = classes[0:int(num_objects)]
    

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
            original_h, original_w, _ = sub_frame.shape
            bboxes = utils.format_boxes(bboxes, original_h, original_w)

            # store all predictions in one parameter for simplicity when calling functions
            pred_bbox = [bboxes, scores, classes, num_objects]

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            #allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to customize tracker for only people)
            allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike', 'bicycle']

            # loop through objects and use class index to get class name, allow only classes in allowed_classes list
            names = []
            deleted_indx = []
            for i in range(num_objects):
                class_indx = int(classes[i])
                class_name = class_names[class_indx]
                if class_name not in allowed_classes:
                    deleted_indx.append(i)
                else:
                    names.append(class_name)
            names = np.array(names)
            count = len(names)
            
            cv2.putText(sub_frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
            # delete detections that are not in allowed_classes
            bboxes = np.delete(bboxes, deleted_indx, axis=0) # [175. 619. 123.  77.] --> xmin, ymin, width, height
            scores = np.delete(scores, deleted_indx, axis=0) # [0.98072225 0.7607064]
            print("Bboxes: " + str(bboxes))
            print("Scores: " + str(scores))
            
            # Transform Translation to get from sub frame to main frame coordinates
            
            dummy_bboxes = [(u) * size_of_subset, (v) * size_of_subset, 0, 0]         
            bboxes_temp = np.add (bboxes,dummy_bboxes) 
            print("dummy_bboxes: " + str(dummy_bboxes))
            print("Bboxes_temp: " + str(bboxes_temp))
            
            
            
            # Stack sub-frames detections to main frame detections
            bboxes_all = np.vstack((bboxes_all,bboxes_temp))
            scores_all = np.hstack((scores_all,scores))
            names_all = np.hstack((names_all,names))
            
            print("u: " + str(u))
            print("v: " + str(v))
            print("Bboxes_all: " + str(bboxes_all))
            print("Scores_all: " + str(scores_all))
            print("Names_all: " + str(names_all))
    
    # encode yolo detections and feed to tracker
    features = encoder(main_frame, bboxes_all)
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature 
                  in zip(bboxes_all, scores_all, names_all, features)]
    
    #initialize color map
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
    
    # run non-maxima supression
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]       
    
    # Call the tracker
    tracker.predict()
    tracker.update(detections)
    
    # update tracks
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue 
        bbox = track.to_tlbr()
        class_name = track.get_class()
        print("bbox: " + str(bbox))
     
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]
                
    #Trajectories
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2))
        pts[track.track_id].append(center)
        for j in range(1, len(pts[track.track_id])):
            if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                continue
            thickness = 2
            #thickness = int(np.sqrt(64/float(j+1))*2)
            cv2.line(main_frame, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
    
    # draw bbox on screen
        bbox_topleft = (int(bbox[0]),int(bbox[1]))
        bbox_bottomright = (int(bbox[2]), int(bbox[3]))
        bbox_topleft_fill = (int(bbox[0]),int(bbox[1]-30))
        bbox_bottomrigh_fill = (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17,int(bbox[1]))
        bbox_text_position = (int(bbox[0]),int(bbox[1]-10))
        cv2.rectangle(main_frame_edit, bbox_topleft, 
                      bbox_bottomright, color, 2)
        cv2.rectangle(main_frame_edit, bbox_topleft_fill, 
                      bbox_bottomrigh_fill, color, -1)
        cv2.putText(main_frame_edit, class_name + "-" + str(track.track_id),bbox_text_position,0, 0.75, (255,255,255),2)
    
    # Print and Store Details of BBox in Console and File

        print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), 
                                                                                            class_name, 
                                                                                            (int(bbox[0]), 
                                                                                             int(bbox[1]), 
                                                                                             int(bbox[2]), 
                                                                                             int(bbox[3]))))
        
        bbbox_output_file.write("Frame-Number: "+ str(frame_num)+", Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {} \n".format(str(track.track_id), 
                                                                                            class_name, 
                                                                                            (int(bbox[0]), 
                                                                                             int(bbox[1]), 
                                                                                             int(bbox[2]), 
                                                                                             int(bbox[3]))))
    
    # calculate frames per second of running detections
    fps = 1 / (time.time() - start_time) #1.0
    print("FPS: %.2f" % fps)
    result = np.asarray(sub_frame)
    result = cv2.cvtColor(sub_frame, cv2.COLOR_RGB2BGR)

    

    cv2.imshow("Output Video", result)

    #Show single subtile of main frame
    main_frame_edit = cv2.rectangle(main_frame_edit, (subset_x_start,subset_y_start),
                                    (subset_x_start + size_of_subset,subset_y_start + size_of_subset),
                                    (255,0,0), 5) 
    main_frame_tile = np.asarray(main_frame_edit)
    main_frame_tile = cv2.cvtColor(main_frame_tile, cv2.COLOR_RGB2BGR)
    cv2.namedWindow("Main_Frame", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("Main_Frame", 1920,1080)
    cv2.imshow("Main_Frame", main_frame_tile)
    
    
    
    out.write(result)
    out_a.write(main_frame_tile)
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        running = False
        break
cv2.destroyAllWindows()
bbbox_output_file.close() # Close BBox-Text-File




In [None]:
            
            """
            B = np.array([[u*size_of_subset],[v*size_of_subset],[u*size_of_subset],[v*size_of_subset]])
            B = np.transpose(B, axes = None)
            
            #print(bboxes.shape)
            #print(B.shape)
            
            bboxes = bboxes + B[:None]
            
            #Create final 
            final_bboxes = np.append(final_bboxes,bboxes)
            final_scores = np.append(final_scores,scores)
            bbox = final_bboxes
            """

    
"""
    # Our operations on the frame come here
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Display the resulting frame
    cv2.imshow('frame',gray)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
"""

In [None]:
"""                   
           # encode yolo detections and feed to tracker
            features = encoder(frame, bboxes)
            detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]
    
            #initialize color map
            cmap = plt.get_cmap('tab20b')
            colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
    
            # run non-maxima supression
            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]       
            print(boxs)
            
            # Call the tracker
            tracker.predict()
            tracker.update(detections)
    
            # update tracks
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue 
                bbox = track.to_tlbr()
                class_name = track.get_class()
     
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                
            #Trajectories
                center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2))
                pts[track.track_id].append(center)
                for j in range(1, len(pts[track.track_id])):
                    if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                        continue
                    thickness = 2
                    #thickness = int(np.sqrt(64/float(j+1))*2)
                    cv2.line(sub_frame, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
    
            # draw bbox on screen
                print(bbox)
                cv2.rectangle(sub_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbbox[3])), color, 2)
                cv2.rectangle(sub_frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
                cv2.putText(sub_frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
    
            # if enable info flag then print details about each track
                print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
    
            # calculate frames per second of running detections
            fps = 1 / (time.time() - start_time) #1.0
            print("FPS: %.2f" % fps)
            result = np.asarray(sub_frame)
            #result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
"""   

In [None]:
# When everything done, release the capture
vid.release()
cv2.destroyAllWindows()

<h1> Save

In [None]:
bbbox_output_file = open(bbox_output, "w") # Open File to store BBox-Coordinates
# While-loop = True
running = True
while running:
    # Capture frame-by-frame
    return_value, main_frame = vid.read()
    main_frame = cv2.cvtColor(main_frame, cv2.COLOR_BGR2RGB)
    main_frame_edit = main_frame
    sub_frame = main_frame[subset_y_start:subset_y_start + size_of_subset,
                           subset_x_start:subset_x_start + size_of_subset]

    

    
    frame_num +=1
    print('Frame #: ', frame_num)
    image_data = cv2.resize(sub_frame, (size, size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    start_time = time.time()

    batch_data = tf.constant(image_data)
    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]
    
    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
    scores=tf.reshape(
        pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
    max_output_size_per_class=500, #50
    max_total_size=500, #50
    iou_threshold=iou,
    score_threshold=score)
                        

    # convert data to numpy arrays and slice out unused elements
    num_objects = valid_detections.numpy()[0]
    bboxes = boxes.numpy()[0]
    bboxes = bboxes[0:int(num_objects)]
    scores = scores.numpy()[0]
    scores = scores[0:int(num_objects)]
    classes = classes.numpy()[0]
    classes = classes[0:int(num_objects)]
    

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
    original_h, original_w, _ = sub_frame.shape
    bboxes = utils.format_boxes(bboxes, original_h, original_w)
    
    # store all predictions in one parameter for simplicity when calling functions
    pred_bbox = [bboxes, scores, classes, num_objects]
    
    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)
    
    # by default allow all classes in .names file
    allowed_classes = list(class_names.values())
            
    # custom allowed classes (uncomment line below to customize tracker for only people)
    #allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike', 'bicycle']

    # loop through objects and use class index to get class name, allow only classes in allowed_classes list
    names = []
    deleted_indx = []
    for i in range(num_objects):
        class_indx = int(classes[i])
        class_name = class_names[class_indx]
        if class_name not in allowed_classes:
            deleted_indx.append(i)
        else:
            names.append(class_name)
    names = np.array(names)
    count = len(names)
            
    cv2.putText(sub_frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
    print("Objects being tracked: {}".format(count))
    # delete detections that are not in allowed_classes
    bboxes = np.delete(bboxes, deleted_indx, axis=0)
    scores = np.delete(scores, deleted_indx, axis=0)
    
    # encode yolo detections and feed to tracker
    features = encoder(sub_frame, bboxes)
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]
    
    #initialize color map
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
    
    # run non-maxima supression
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]       
    
    # Call the tracker
    tracker.predict()
    tracker.update(detections)
    
    # update tracks
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue 
        bbox = track.to_tlbr()
        class_name = track.get_class()
     
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]
                
    #Trajectories
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2))
        pts[track.track_id].append(center)
        for j in range(1, len(pts[track.track_id])):
            if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                continue
            thickness = 2
            #thickness = int(np.sqrt(64/float(j+1))*2)
            cv2.line(sub_frame, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
    
    # draw bbox on screen
        bbox_topleft = (subset_x_start + int(bbox[0]), subset_y_start + int(bbox[1]))
        bbox_bottomright = (subset_x_start +  int(bbox[2]), subset_y_start  + int(bbox[3]))
        bbox_topleft_fill = (subset_x_start + int(bbox[0]), subset_y_start + int(bbox[1]-30))
        bbox_bottomrigh_fill = (subset_x_start + int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, subset_y_start + int(bbox[1]))
        bbox_text_position = (subset_x_start + int(bbox[0]), subset_y_start + int(bbox[1]-10))
        cv2.rectangle(main_frame_edit, bbox_topleft, 
                      bbox_bottomright, color, 2)
        cv2.rectangle(main_frame_edit, bbox_topleft_fill, 
                      bbox_bottomrigh_fill, color, -1)
        cv2.putText(main_frame_edit, class_name + "-" + str(track.track_id),bbox_text_position,0, 0.75, (255,255,255),2)
    
    # Print and Store Details of BBox in Console and File

        print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), 
                                                                                            class_name, 
                                                                                            (subset_x_start + int(bbox[0]), 
                                                                                             subset_y_start + int(bbox[1]), 
                                                                                             subset_x_start + int(bbox[2]), 
                                                                                             subset_y_start + int(bbox[3]))))
        
        bbbox_output_file.write("Frame-Number: "+ str(frame_num)+", Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {} \n".format(str(track.track_id), 
                                                                                            class_name, 
                                                                                            (subset_x_start + int(bbox[0]), 
                                                                                             subset_y_start + int(bbox[1]), 
                                                                                             subset_x_start + int(bbox[2]), 
                                                                                             subset_y_start + int(bbox[3]))))
    
    # calculate frames per second of running detections
    fps = 1 / (time.time() - start_time) #1.0
    print("FPS: %.2f" % fps)
    result = np.asarray(sub_frame)
    result = cv2.cvtColor(sub_frame, cv2.COLOR_RGB2BGR)

    

    cv2.imshow("Output Video", result)

    #Show single subtile of main frame
    main_frame_edit = cv2.rectangle(main_frame_edit, (subset_x_start,subset_y_start),
                                    (subset_x_start + size_of_subset,subset_y_start + size_of_subset),
                                    (255,0,0), 5) 
    main_frame_tile = np.asarray(main_frame_edit)
    main_frame_tile = cv2.cvtColor(main_frame_tile, cv2.COLOR_RGB2BGR)
    cv2.namedWindow("Main_Frame", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("Main_Frame", 1920,1080)
    cv2.imshow("Main_Frame", main_frame_tile)
    
    
    
    out.write(result)
    out_a.write(main_frame_tile)
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        running = False
        break
cv2.destroyAllWindows()
bbbox_output_file.close() # Close BBox-Text-File


