# Deep Learning for viticulture (Main System)

### Imports

In [None]:
import os
#os.chdir('..')
print(os.getcwd())
import tensorflow as tf
from research.object_detection.utils import label_map_util
import cv2
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np
from research.object_detection.utils import visualization_utils as vis_util
from collections import deque

## Choose the object detection model to be used
* set up the paths for the used model
* oncluding: model name, path to chekpoint file, path to the used label map

In [None]:
#Prepare Paths
# inference_graph and vine_label_label.pbtxt must be inside the model folder

# select model
MODEL_NAME = 'faster_rcnn_resnet101_coco'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT =os.path.join('vine_models', MODEL_NAME ,'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS =os.path.join('vine_models', MODEL_NAME , 'vine_label_map.pbtxt')

NUM_CLASSES = 3

In [None]:
#Load the tensorflow model into memory
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

In [None]:
#Loading the label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Setup Video for the Process
* fill in the path to the video or just the name of it if it's in the same directory as the skript
* with cap.set you can start at a specific MSEC Position in the video


In [None]:
#Prepare the Video
video_name = 'video_2'
cap = cv2.VideoCapture(video_name+".MP4")
cap.set(cv2.CAP_PROP_POS_MSEC,2000)


## Detection of the movement direction of the video
* initial short application of the object detection model on the video in order to identify the movement direction of the video
* based on the displacement of detected objects over several frames, the movement direction is derived
* when a certain threshold is met for the detection the direction gets identified

In [None]:
#Movement Detection

with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    
    # variables to determine the movement direction
    last_movement_label_xmax = 0
    movement_counter = 0

    
    # Open the video file
    while cap.isOpened():
        _, image_np = cap.read()
        image_np_expanded = np.expand_dims(image_np, axis=0)
        # Predict the boxes with class and score
        (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})
        
        # Create a dict with information about predicted boxes
        data = {'y_min': [boxes[0][i][0] for i in range(len(boxes[0]))],
                'x_min': [boxes[0][i][1] for i in range(len(boxes[0]))],
                'y_max': [boxes[0][i][2] for i in range(len(boxes[0]))],
                'x_max': [boxes[0][i][3] for i in range(len(boxes[0]))],
                'score': scores[0], 
                'class': classes[0]}
        
        # Store prediction dict in pandas DataFrame
        df = pd.DataFrame(data=data, index=np.arange(0, len(boxes[0]), 1))
        relevant_movement_label = df.loc[(df['class'] == 1) & (df['score'] > 0.7)]
    
        # Reset the index of the frame after slicing it
        for frame in [relevant_movement_label]:
            frame.reset_index(inplace=True)
        
        # Get minimal xmax as comparison value
        current_movement_label_xmax = float(round(relevant_movement_label.loc[:, 'x_max'].min(), 10))
        
        # If the current minimal xmax is larger than the last one, the movement is going in right direction
        # This allows to iron out movement mistakes due to unsteady camera movement or not moving frames
        if current_movement_label_xmax > last_movement_label_xmax:
            movement_counter = movement_counter + 1
            print(movement_counter)
        else:
            movement_counter = movement_counter - 1
            print(movement_counter)

        last_movement_label_xmax = current_movement_label_xmax
        
        if movement_counter == 10 or movement_counter == -10:
            break

movement_detected = "right" if movement_counter == 10 else "left"
print("identified movement direction: " + str(movement_detected))

# Start the video processing

In [None]:
#Start the video
#release capture from the previous movement detection
cap.release()
cap = cv2.VideoCapture(video_name+".MP4")
cap.set(cv2.CAP_PROP_POS_MSEC,2000)   # start the processing at a specific msec position in the video


### Configuration of Filter Values
* change the filter parameters for the post filtering if needed
* this might be needed for videos with a different camera angle

In [None]:
y_threshold = 0.7                  # y_max > y_threshold -> used to ignore vines of the second row
y2_threshold = 0.8                 # y_min < y2_threshold
search_field_upper_border = 0.5    # upper limit of the search field criteria
search_field_lower_border = 0.8    # lower limit of the search field criteria
                                   # for this search field setting: only keep detection-boxes if its center lies between
                                   # 50% and 80% of the image height. -> used to focus the tracking on the vine area
                                   # if needed this can be adapted for a different camera angle

### Initiate Variables

In [None]:
w = 1920 #1280 # 1920                    # set up width and height of used video -> important for visualization of the object tracking
h = 1080 #720  # 1080                    # not mandatory for the actual functionallity

movement = movement_detected             # result from movement detection. "right" = vine moves from left to right     

exit_zone_right = 0.92                   # set up exit_zones for entities
exit_zone_left = 0.08

avg_speed = 0                           

buffer = 20                             # size of deque objects

entity_list = []                        # saves the entity-deque-objects



last_time_modified = []                 # saves the last frame  an entity of the entity_list has been modified
current_speed = []                      # current speed of an entity
is_active = []                          # condition of the entity -> is it active? (Boolen) 
expected_next_center = []               # calculated expected center for entities


average_movement_speed = 0               # average speed of all active entities
frame_counter = 0                        # current frame number

active_entities = []                     # all currently active entities

vine_counter = 0                         # counts how many vines were detected at current frame
is_entity_counted = []                   # Boolean; enables counting tracked vines only once
count_threshold = 10                     # set up how many times an object has to be tracked in order to be counted

entity_distance = []                     # distances of entities to the next entity
entity_MSEC = []                         # MSEC Positions of Entities if they get deactivated




### OBJECT DETECTION / POST FILTERING / OBJECT TRACKER / OBJECT COUNTER

In [None]:
#deploy the model on each frame
with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    
    #<---------------------------------------------- START OF: OBJECT DETECTION   ------------------------------------------>
    
    ############################################ START OF: PREPARE THE MODEL> #######################################
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    # get detection-boxes
    # Each box represents a part of the image where a particular object was detected.
    # for this given use case detection-boxes can be vines, woodensticks, or metalsticks (trained classes of the model)
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    ############################################ END OF: PREPARE THE MODEL> ########################################
    
    
    ############################################ START OF: RUN THE MODEL> ##########################################
    
    while(cap.isOpened()):
      #returns numpy.ndarray with shape (1080, 1920, 3) for each videoframe
      #from now on every further step is always applied to a single frame
      (ret, image_np) = cap.read() 
      if not ret:
            break
      
      # increment the frame_counter variable for each frame in order to save the current frame number     
      frame_counter += 1
      # also save frame counter as tuple -> used for speed normalization later on
      frame_counter_tuple = (frame_counter,) 
        
      # Expand dimensions since the model expects input to have shape (1,1080,1920,3) given full hd images
      image_np_expanded = np.expand_dims(image_np, axis=0)
      
      # run the detection model
      # it returns the x and y coordinates for the boxes, the score, the classes and num  
      (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded}) 
      

      # get all coordinates, score and class of each detection-boxes for the class = 0
      # in this case class 0 = vine, class 1 = woodenstick, class 2 = metalstick
      data = {'y_min': [boxes[0][i][0] for i in range(len(boxes[0]))],
                'x_min': [boxes[0][i][1] for i in range(len(boxes[0]))],
                'y_max': [boxes[0][i][2] for i in range(len(boxes[0]))],
                'x_max': [boxes[0][i][3] for i in range(len(boxes[0]))],
                'score': scores[0], 
                'class': classes[0]}
  
      # create a dataframe which contains all detection-boxes for the label/class vine of the current frame
      df = pd.DataFrame(data=data, index=np.arange(0, len(boxes[0]), 1))
        
      ############################################ END OF: RUN THE MODEL> ################################################
    
      #<---------------------------------------------- END OF: OBJECT DETECTION   ------------------------------------------>
        
       
    
    
        
       
      #<---------------------------------------------- START OF: POST FILTERING   ------------------------------------------>   
          
      ############################################ START OF: POST FILTER 1> ################################################
    
      # only use labels which have a score above 70%
      # only use labels whose y-max coordinates are above y_threshold (see filter setup above)
      # (optional:  only use labels whose y-min coordinates are below y2_threshold (see filter setup above))
      # relevant_labels contains all labels that met these requirements 
      relevant_labels = df.loc[(df['score'] > 0.7) & (df['class'] == 1.0) & (df['y_max'] > y_threshold) & (df['y_min'] < y2_threshold) ]
    
      # contains all labels which met the first filter criteria
      labels_df = relevant_labels.values                                          
      ############################################## END OF: POST FILTER 1> ################################################
        
    
      ############################################ START OF: POST FILTER 2> ################################################
        
      # create labels list which will contain all labels which met this first and second filter criteria
      # this label list is created for each frame  
      labels = []
      # get values for each detection-box
      for i in range(0,len(labels_df)):
            score = (labels_df[i][1])
            x_max = (labels_df[i][2])
            x_min = (labels_df[i][3])
            y_max = (labels_df[i][4])
            y_min = (labels_df[i][5])
     
            # calculate the center y-coordinate of a detection box
            center_y = (y_min+(y_max-y_min)/2)
            
            ## SEARCH FIELD CHEKCER ##
            # only keep an detection-box if its center y-coordinate lies between a given range (upper/lower_seach_field)
            # calculate the center of each detection box
            center = (float(x_min + (x_max-x_min)/2),float(y_min+(y_max-y_min)/2)) #is tuple
            if search_field_lower_border > center_y > search_field_upper_border :  
                # if a label meets this criteria, it is added to the labels list as following:
                # labels= [assigned, center(x,y), y_min, x_min, y_max, x_max]
                # assigned = False means, that is has not beed assigned to an entity yet
                # this assigned status will be used for the object racking method later on
                data2 = [False, center, y_min, x_min, y_max, x_max,score]          
                labels.append(data2)
            else:
                print("an detection-box did not met the filter criteria 2")
                
    ############################################## END OF: POST FILTER 2> ################################################ 
    
    ############################################ START OF: POST FILTER 3> ################################################


      # eleminate overlapping labels
      # did the model detect any labels on this frame?  
      if len(labels) > 0:
          # setup a condition as False
          #       
          condition = False
          while condition == False:

              # final_label_list will contain all non overlapping detection-boxes objects for this iteration
              # it will be used as initial labels list for further iterations
              final_label_list = []

              #check for each label:
              for label in labels:
                  #get xmin and xmax of label in order to compare with x-values of other labels
                  label_round_xmin_checker = label[3]
                  label_round_xmax_checker = label[5]

                  # labels_checker list will contain all overlapping labels for the specific label of this iteration
                  labels_checker = []

                  # new_label_list will contain the label with the highest score of labels_checker
                  new_label_list = []

                  # check whether label is overlapping with others or not
                  for label_check in labels:
                      # skip label_check with chosen label itself
                      if label_round_xmin_checker == label_check[3] and label_round_xmax_checker == label_check[5]:
                          break
                      else:  
                          # if a xmin or xmax value of the given lies within the xmin-xmax range of another label, these labels are overlapping each other
                          # add overlapping label for further comparison to labels_ckecker list
                          # checking range is also slightly expanded by 5% of the image width in each direction
                          # this can also match very close detection-boxes which are nearly overlapping  
                          if label_check[3] - 0.05 < label_round_xmin_checker < label_check[5] + 0.05 or label_check[3] - 0.05 < label_round_xmax_checker < label_check[5] + 0.05:    
                              labels_checker.append(label_check)
                          # label is not overlapping with any of the labels
                          else:
                              labels_checker = labels_checker

                  # if the labels_ckecker list contains any candidates:
                  # -> overlappings were detected
                  # -> overlappings have to be resolver  
                  if len(labels_checker) > 0:

                        # add the actual label with which the overlap was detected to the checker list
                        labels_checker.append(label)

                        # compare score values for each label in labels_checker list
                        for label_comparison in labels_checker:

                            # if given label has the highest score of all labels in labels_checker list
                            # add label to new_label_list (winner of score value comparison)
                            if label_comparison[6] == max([sublist[6] for sublist in labels_checker]):
                                new_label_list = label_comparison
                            else:
                                print(str(label_comparison[6])+"was not the maximun score -> the label got deleted")
                                
                        
                  # if the labels_ckecker ist empty:
                  # -> no overlappings were detected
                  # -> label can be added right away to final_label_list
                  else:        
                      final_label_list.append(label)

                  # add new_label list to final_label_list
                  # only if final_label_list is not already containing the label from a previous iteration
                  if len(new_label_list) > 0 and new_label_list not in final_label_list:
                      final_label_list.append(new_label_list)


              # final_label_list is the initial input labels list for next while iteration
              # if: length of initial labels list equals length of output final_label_list 
              # -> no changes were made, meaning initial list is not containing any more overlapping labels
              # -> change condition to True and exit while loop
              # -> update the labels list with the filtered labels from the third filter criteria
              #    the labels list now contains all detection-boxes which met filter criteria 1,2 and 3
              if len(labels) == len(final_label_list):
                  condition = True
                  labels = final_label_list
              # else: length is not euqal
              # -> changes were made during this iteration
              # -> condition stays False and the next iteraion begins  
              else:
                  condition = False
                  labels = final_label_list 
      ############################################## END OF: POST FILTER 2> ################################################
    
      #<---------------------------------------------- END OF: POST FILTERING   ------------------------------------------>
        
        
        
        
        
        
      
      #<---------------------------------------------- START OF: OBJECT TRACKING   --------------------------------------->
    
      ########################################## START OF: ENTITY ASSIGNMENT    ###########################################
        
      # here the valid detection-boxes as the output of the post filtering are used for the Object Tracker  
      # the assignment of valid detection-boxes to entities is based on 4 different cases:
      # In order to track objects, entities are created and detection-boxes are matched with these entities over frames
    
      # !!!!!!!! keep in mind: the term "label" is used here for a valid detection-box of the labels list !!!!!!!!  
        
      if len(labels) > 0:                                        # did the model detect any labels on this frame?
            
          if is_active.count(True) > 0:                          # is there at least one active entity
                                                                 # if there is no active entity -> case 1 is used
                                                                 # -> in this case a new entity is created for the detection box
                                                                 # (see case 1 at the bottom)
                                                                 # case 1 is always the case for the first frame   
                        
          ## Entity Assignment Case 2: ##
          # detection-box/label can be assigned to an active visible entity
          # # check if label can be assigned to an hidden entity
              
              # get the nearest entity for current label
              nearest_entity = []                                  # set up a nearest_entity list 
              for label in range(len(labels)):                     # iterate over labels  
                    distance = []                                  # clear distance after each label
                    visible_id = []
                    for visible in visible_entities:               # iterate over visible entites     
                        if last_time_modified[visible] != frame_counter: # avoid assiging multiple labels to one entity
                                                                         # -> only proceed if the entity wasn't updated in this frame yet
                            dis = abs(labels[label][1][0]-entity_list[visible][0][0])
                            distance.append(dis)
                            visible_id.append(visible)             # calculate the distance for a given label to each currently active visible entity 
                                                                   # append these distances to the distance list
                    # if there are distances:            
                    # get the index of the min value of distance and apply it as index to visible_id to get the ID of the nearest entity
                    # the entity with the smallest entity to a given label is a nearest_entity
                    if len(distance) > 0:
                        nearest_entity.append(visible_id[distance.index(min(distance))])
                        #nearest_entity = [ID , ID , ID , ID]
                        
                        
                        # check if the detection-box/label was already assigned to another entity
                        # and build a tolerance area (+/- 8% of image width/height around the center of nearest entities)
                        if labels[label][0] == False and entity_list[nearest_entity[label]][0][0] -0.08 < labels[label][1][0] < entity_list[nearest_entity[label]][0][0] + 0.08 and entity_list[nearest_entity[label]][0][1] -0.08 < labels[label][1][1] < entity_list[nearest_entity[label]][0][1] + 0.08: 
                        
                            # if the center of the label lies within a tolerance area of a visible entity:
                            # -> update existing entity
                            # -> update last position of entity with the center of the matching detection-box
                            entity_list[nearest_entity[label]].appendleft(labels[label][1]+frame_counter_tuple)  
                            # -> mark that this detection-box has been assigned to an entity 
                            labels[label][0] = True
                            # -> update last modified frame number to the current frame number
                            last_time_modified[nearest_entity[label]] = frame_counter 
                       
          ## Entity Assignment Case 3: ##
          # detection-box/label can be assigned to an active hidden entity
          # check if label can be assigned to an hidden entity  
        
                    if labels[label][0] == False:                   # check if the detection-box/label was already assigned
                        for hidden in hidden_entities:              # iterate over hidden entites
                            if last_time_modified[hidden] != frame_counter: # avoid assiging multiple labels to one entity
                                
                                if labels[label][0] == False:


                                    if movement == "right":                                  
                                        # build tolerance area (+/- 10% of image width/height around the center of nearest entities)
                                        # a larger tolerance area is used for hidden entities
                                        # this is because for hidden entities calculated expected_next center is used (see below at UPDATE)
                                        if expected_next_center[hidden]-0.1 < labels[label][1][0] < expected_next_center[hidden]+0.1 and entity_list[hidden][0][1]-0.1 < labels[label][1][1] < entity_list[hidden][0][1]+0.1:          # check for X and Y-position condition                
                                            
                                            # if the center of the label lies within a tolerance area of a hidden entity:
                                            # -> update existing entity
                                            # -> update expected position of hidden entity with the center of the matching detection-box/label
                                            entity_list[hidden].appendleft(labels[label][1]+frame_counter_tuple)  
                                            
                                            # -> mark that this detection-box has been assigned to an entity
                                            labels[label][0] = True
                                            # -> update last modified frame number to the current frame number
                                            last_time_modified[hidden] = frame_counter 
                                            
                                    # the left case is for this implementation the same as the right case
                                    # if needed it can be differentiated between the movement direction
                                    # e.g. a larger tolerance area in the direction of movement can be used
                                    elif movement == "left":
                                        if expected_next_center[hidden]-0.1 < labels[label][1][0] < expected_next_center[hidden]+0.1 and entity_list[hidden][0][1]-0.1 < labels[label][1][1] < entity_list[hidden][0][1]+0.01:  
                                  
                                            entity_list[hidden].appendleft(labels[label][1]+frame_counter_tuple)  
                                             
                                            labels[label][0] = True
                                            last_time_modified[hidden] = frame_counter 

          ## Entity Assignment Case 4: ##
          # detection-box/label can not be assigned to an active visible entity or an active hidden entity
          # check if a new entitiy must be created
          # differentiates between left and right movement direction
            
                        if labels[label][0] == False:
                            ## movement = left ##
                            # the left case is basically the same but the check condition: exit_zone_left
                            if movement == "left":
                                
                                if labels[label][1][0] < exit_zone_left:    # xmin of detection-box < exit zone  !!!! specific for right direction !!!!  
                                                                            # dont open new entities in exit zone
                                        
                                    # additionally check if there are any visible or hidden entities near the label/d-box
                                    # there might be an entity near it which has been updated
                                    # in order to avoid the creation of a new entity near an existing one this check is done
                                    distance2 = []                                  # get distance 2
                                    visible_id2 = []                                # distance 2 = distances visible entities
                                    dis0 = 10
                                    distance2.append(dis0)                                     
                                    for visible in visible_entities:                # iterate over all visible entites                                                                  
                                        dis2 = abs(labels[label][1][0]-entity_list[visible][0][0])
                                        distance2.append(dis2)
                                        visible_id2.append(visible)                 
                                    
                                    distance3 = []                                  # get distance 3
                                    visible_id3 = []                                # distance 3 = distances hidden entities 
                                    distance3.append(dis0)                          
                                    for hidden in hidden_entities:                  # iterate over all hidden entities                      
                                        dis3 = abs(labels[label][1][0]-entity_list[hidden][0][0])
                                        distance3.append(dis3)
                                        visible_id3.append(hidden)
                                
                                
                                    if len(distance2) > 1 or len(distance3) > 1:    # check if a visible or hidden entitie is nearby
                                            if -0.1 < min(distance2) < 0.1 or -0.1 < min(distance3) < 0.1: # tolerance area (+/- 10% width of image) 
                                                print("nearby entity -> it is not allowed ")

                                            # -> enough distance to nearby existing entities
                                            # -> create new entity
                                            else:
                                                                                               # create a new entity
                                                entity = deque(maxlen = buffer)                # create new deque object for new entity
                                                entity.appendleft(labels[label][1]+frame_counter_tuple)# append the center of current label
                                                entity_list.append(entity)                     # append entity to entity_list                                    

                                                labels[label][0] = True                        # D-Box has been assigned. assigned = True
                                                last_time_modified.append(0)                   # append frame counter to the last_time_modified list
                                                last_time_modified[entity_list.index(entity_list[-1])] = frame_counter 

                                                is_active.append(False)                        # set up entity as an active entity, since it's new
                                                is_active[entity_list.index(entity_list[-1])] = True

                                                is_entity_counted.append(False)                # entity has not been counted yet, since it's new

                                                expected_next_center.append(labels[label][1][0]) # append entry for expected next center
                                                current_speed.append(0)                        # append entry for current speed of the new entity

                                    # if no distances are abailable:
                                    # -> create new entity
                                    # -> for entity creation see above: 
                                    else:
                                        entity = deque(maxlen = buffer)                
                                        entity.appendleft(labels[label][1]+frame_counter_tuple)
                                        entity_list.append(entity)                     

                                        labels[label][0] = True
                                        last_time_modified.append(0)
                                        last_time_modified[entity_list.index(entity_list[-1])] = frame_counter  

                                        is_active.append(False)                         
                                        is_active[entity_list.index(entity_list[-1])] = True

                                        is_entity_counted.append(False)

                                        expected_next_center.append(labels[label][1][0]) 
                                        current_speed.append(0)    
                            
                            # movement = right #
                            else: 
                               
                                if labels[label][1][0] < exit_zone_right:      # xmin of detection-box < exit zone    
                                                                               # dont open new entities in exit zone
                                        
                                    # additionally check if there are any visible or hidden entities near the label/d-box
                                    # there might be an entity near it which has been updated
                                    # in order to avoid the creation of a new entity near an existing one this check is done
                                    distance2 = []                                  # get distance 2
                                    visible_id2 = []                                # distance 2 = distances visible entities
                                    dis0 = 10
                                    distance2.append(dis0)                                     
                                    for visible in visible_entities:                # iterate over all visible entites                                                                  
                                        dis2 = abs(labels[label][1][0]-entity_list[visible][0][0])
                                        distance2.append(dis2)
                                        visible_id2.append(visible)                 
                                    
                                    distance3 = []                                  # get distance 3
                                    visible_id3 = []                                # distance 3 = distances hidden entities 
                                    distance3.append(dis0)                          
                                    for hidden in hidden_entities:                  # iterate over all hidden entities                      
                                        dis3 = abs(labels[label][1][0]-entity_list[hidden][0][0])
                                        distance3.append(dis3)
                                        visible_id3.append(hidden)
                                        
                                        
                                    if len(distance2) > 1 or len(distance3) > 1:    # check if a visible or hidden entitie is nearby
                                        if -0.1 < min(distance2) < 0.1 or -0.1 < min(distance3) < 0.1: # tolerance area (+/- 10% width of image) 
                                            print("nearby entity -> it is not allowed ")
                                        
                                        # -> enough distance to nearby existing entities
                                        # -> create new entity
                                        else:
                                                                                           # create a new entity
                                            entity = deque(maxlen = buffer)                # create new deque object for new entity
                                            entity.appendleft(labels[label][1]+frame_counter_tuple)# append the center of current label
                                            entity_list.append(entity)                     # append entity to entity_list                                    

                                            labels[label][0] = True                        # D-Box has been assigned. assigned = True
                                            last_time_modified.append(0)                   # append frame counter to the last_time_modified list
                                            last_time_modified[entity_list.index(entity_list[-1])] = frame_counter 

                                            is_active.append(False)                        # set up entity as an active entity, since it's new
                                            is_active[entity_list.index(entity_list[-1])] = True

                                            is_entity_counted.append(False)                # entity has not been counted yet, since it's new

                                            expected_next_center.append(labels[label][1][0]) # append entry for expected next center
                                            current_speed.append(0)                        # add current speed of the new entity
                                    
                                    # if no distances are abailable:
                                    # -> create new entity
                                    # -> for entity creation see above: 
                                    else:
                                        entity = deque(maxlen = buffer)                
                                        entity.appendleft(labels[label][1]+frame_counter_tuple)
                                        entity_list.append(entity)                     

                                        labels[label][0] = True
                                        last_time_modified.append(0)
                                        last_time_modified[entity_list.index(entity_list[-1])] = frame_counter  

                                        is_active.append(False)                         
                                        is_active[entity_list.index(entity_list[-1])] = True

                                        is_entity_counted.append(False)

                                        expected_next_center.append(labels[label][1][0]) 
                                        current_speed.append(0)
                                        
   
                                    
          ## Entity Assignment Case 1: ##
          # no active entities -> Detection-Box of label list must belong to a new entity
          # -> create a new entity for each   
          else:                                                        
              for label in range(len(labels)):               # for each D-Box in the label list:
                  if labels[label][0] == False :             # check if the D-Box has been assigned yet
                                                             # create a new entity
                      entity = deque(maxlen=buffer)          # create entitiy and corresponding list entries
                      entity.appendleft(labels[label][1]+frame_counter_tuple) 
                      entity_list.append(entity)             # append newly created entity to the entity list
                                                              

                      labels[label][0] = True                # D-Box has been assigned. assigned = True
                      last_time_modified.append(0)           # append frame counter to the last_time_modified list
                      last_time_modified[entity_list.index(entity_list[-1])] = frame_counter
                      is_active.append(False)                # set up entity as an active entity, since it's new
                      is_active[entity_list.index(entity_list[-1])] = True
                      
                      is_entity_counted.append(False)        # entity has not been counted yet, since it's new
                        
                      expected_next_center.append(labels[label][1][0]) # append entry for expected next center
                      current_speed.append(0)                # append entry for current speed of the new entity
                        
                                      
 ########################################## END OF: ENTITY ASSIGNMENT    #################################################
 
 ################################# Start of: UPDATING ENTITIES AFTER ASIIGNMENT PROCESS    ###############################

 # these updating steps are used in order to prepare all entities for the assignment process for the next frame
 # during this update process, the following is done:
 # -> check if entites can be closed (deactivating entities)
 # -> based on this prepare a list of all active entities for the next frame
 # -> update hidden and visible status of entities
 # -> update movement speed of entities
 # -> update the current average speed based on the updated movement speeds of entities
 # -> based on movement speed: calculate expected next positions for entities for the next frame   

 # !!!! since the the code for a right and left movement direction is basically the same. only the righ version is described !!!!!
 # !!!! please scroll down to #### RIGHT MOVEMENT ####
    
    
    
    
 #### LEFT MOVEMENT #####    
 #different behaviour for movement directions:
      
      if movement == "left":
            #### is_active ####                                             
          for active in active_entities:                                    # check if an entity is still active
              if last_time_modified[active] + 3 < frame_counter:            # if : entity not updated for 3 frames?
                  if entity_list[active][0][0]  < 0.05:                     # and last recorded position is in exit zone?
                                                                            # -> proceed to deactivate the entity
                        
                                                                            # if the entity gets deactivated:
                                                                            # calculate the distance to the next entity against the direction of movement
                                                                            # this distance is later used for MISSING OBJECT IDENTIFIER
                        
                    # check distance for an entity once, when deactivating it
                    # only take distance for entities which have successfully been counted
                    if is_entity_counted[active]:
                        
                        # create list x_pos which will contain all center positions of currently counted hidden or visible entities
                        x_pos = []
                        for hidden in hidden_entities:
                            if active != hidden:
                                if is_entity_counted[hidden]:
                                    x_pos.append(expected_next_center[hidden])
                        for visible in visible_entities:
                            if active != visible:
                                if is_entity_counted[visible]:
                                    x_pos.append(entity_list[visible][0][0])
                        
                        # if other active entities were found for a distance check:
                        if len(x_pos) > 0:
                            
                            # sort the list of x_pos
                            # the sorting setting of this list depends on the movement direction                            
                            # for a right movement this has to be set on True (from right to left)
                            x_pos.sort(reverse=False) 
                            # the sorting ensures that the next entity to the leaving one is chosen for the distance calculation
                            
                            # the distance is appended to the distance list for the later check
                            # this will be needed for the missing vine checker later on
                            # it works as follows:
                            # if the next active entity to the currently leaving entity is hidden --> calculate abs distance to expected next center
                            # if the next active entity to the currently leaving entity is visible --> calculate abs distance to this position
                            if active in hidden_entities:
                                entity_distance.append((active,abs(expected_next_center[active] - x_pos[0])))
                            if active in visible_entities:  
                                entity_distance.append((active,abs(entity_list[active][0][0] - x_pos[0])))
                                
                            
                        # if ni other active entities were found for a distance check:
                        # use a max range value for the distance
                        # for right movement -> abs( position d-box - 10% of image with.)
                        # for left movement -> abs( position d-box - 90% of image with. )
                        else:
                            entity_distance.append((active,abs(entity_list[active][0][0] - 0.1)))

                            
                    # Here the checkup screenshot is written in case the winemaker wants to verify a missing vine
                    # the screenshot name contains the video_name + frame number + MSEC position in the video
                    cv2.imwrite('vine_screenshots/'+video_name+"_"+str(frame_counter)+"_"+str(cap.get(cv2.CAP_PROP_POS_MSEC))+"_vine.jpg",image_np)    # Save Screenshot of frame if a vine leaves the screen
                    
                    # deactivate the entity afterwards
                    is_active[active] = False                             
                                        
                    # append MSEC Position in a separate list with the entity number if it leaves the screen
                    # this is important in order to match the missing vine with the specific gps location of the video later on
                    # entity_MSEC = [(ID, MSEC), (ID, MSEC)]
                    entity_MSEC.append((active,cap.get(cv2.CAP_PROP_POS_MSEC)))
                    
                  elif expected_next_center[active] < 0.05:                  # check if expected position is in exit zone?
                                                                             # same procedure as above....
                    if is_entity_counted[active]:
                        
                        x_pos = []
                        for hidden in hidden_entities:
                            if active != hidden:
                                if is_entity_counted[hidden]:
                                    x_pos.append(expected_next_center[hidden])
                        for visible in visible_entities:
                            if active != visible:
                                if is_entity_counted[visible]:
                                    x_pos.append(entity_list[visible][0][0])
                        
                        if len(x_pos) > 0:
                            x_pos.sort(reverse=False) 

                            if active in hidden_entities:
                                entity_distance.append((active,abs(expected_next_center[active] - x_pos[0])))
                            if active in visible_entities:  
                                entity_distance.append((active,abs(entity_list[active][0][0] - x_pos[0])))   
                        else:
                            entity_distance.append((active,abs(entity_list[active][0][0] - 0.1)))
                                                
                    cv2.imwrite('vine_screenshots/'+video_name+"_"+str(frame_counter)+"_"+str(cap.get(cv2.CAP_PROP_POS_MSEC))+"_vine.jpg",image_np)    # Save Screenshot of frame if a vine leaves the screen

                    is_active[active] = False                           
                    
                    entity_MSEC.append((active,cap.get(cv2.CAP_PROP_POS_MSEC)))
                    
                  # if a Entity can not be updated again for a long time (90 frames or more)
                  # -> deactivate the entity
                  # -> if it was counted => reverse this count  
                  elif last_time_modified[active] + 90 < frame_counter:  
                    is_active[active] = False
                    if is_entity_counted[active] == True:
                        vine_counter = vine_counter-1
                        
              elif last_time_modified[active] + 60 < frame_counter: # eleminate entities going completly lost outside the exit box
                    is_active[active] = False



        ## active_entities ##
        # this is used to update the list of active entities for the next frame
        # get the indices for all active entities
        # update the list of all currently active entities (active_entities)
          active_entities.clear()
          active_entities = [i for i, j in enumerate(is_active) if j == True]


        ## visible entities and hidden entities ##
        # this is used to update the list of hidden and visible entities for the next frame
        # if a entity did not get an update since 3 frames -> assign it to the hidden entities list
        # if a entity did get an update since 3 frames -> assign it to the visible entities list
        #
        # it is done by comparing the last time modified frame number with the current frame number
          visible_entities = []
          hidden_entities = []
          for active in active_entities:
                if last_time_modified[active]+3 > frame_counter:
                    visible_entities.append(active)
                else:
                    hidden_entities.append(active)

        ## current_speed ##
        # calculate the moving speed for each active entitiy
        # a speed can only be calculated if the entity has at least 2 entries of known positions on frames
        # 
          for active in active_entities:
        # check if the entity has 2 entries:
        # if yes: calculate the speed
              if len(entity_list[active]) >= 2 :
                  diff = []
                    
                  for k in range(len(entity_list[active])-1):
                      diff.append(0)
                      # calculate the difference of consecutive X-Positions of the center of an entity
                      # if this difference for a center pair is negative for a right movement -> use the avg_speed instead
                      # this ensures that the speed is always aligned to the movement direction
                      # since vines are static it should not be possible that they have a negative movement speed
                      # therefore this condition is used
                      if (entity_list[active][k][0] - entity_list[active][k+1][0]) >= 0 :                
                          # the difference is normalized since there might frame gaps between center pairs
                          # therefore the difference is divided by the difference between the framenumbers of the center pairs
                          #  normalized difference = x_value_center[k] - x_value_center[k+1] / framenumber[k] - framenumber[k+1]
                          diff[k] = (entity_list[active][k][0] - entity_list[active][k+1][0])/(entity_list[active][k][2]-entity_list[active][k+1][2])
                      else:
                          diff[k] = avg_speed
                  # the current speed is the average of all distances between the consecutive X-Positions of the center of an entity         
                  current_speed[active] = sum(diff) / (len(entity_list[active])-1)
                    
                  # dampening the speed if it makes too high of a spike
                  # this can be used to dampen the speed if needed
                    
                  #if current_speed[active]*2.5 >= avg_speed:
                  #      current_speed[active] = current_speed[active]*0.8
                  #      #current_speed[active] = avg_speed
                  #else:
                  #      current_speed[active] = current_speed[active]
                        

          # if no: use the avg_speed over all entities instead until it has at least 2 entries
          # see avg_speed below
              else:
                  current_speed[active] = avg_speed #durch avg ersetzen
        
        ## expected_next_center ##                             
        # calculate the expected position for each active entities for the next frame
        # check if the entity got an update on the current frame
        # if it got updated: use the current center position of the assigned detection box and move it by the speed of the entity
          for active in active_entities:    
               if last_time_modified[active] == frame_counter: 
                   expected_next_center[active] = entity_list[active][0][0] + current_speed[active]
        # if it did not get an update: use the last expected position of the entity and move it by the speed of the entity
               elif last_time_modified[active] < frame_counter: 
                    expected_next_center[active] = expected_next_center[active] + current_speed[active]
      
    
    ## average entity speed ##
    # calculate the average entity speed
    # use the values of the movement speed of all currently acitve entities
    # calculate the average over them
    
    # this is needed for the calculation of the expected next position of a hidden entitie:
    # if it was only seen once after it turned into the hidden status there is no possibility to calculate the speed of it
    # therefore the avg_speed is needed 
      speeds = []
      for active in active_entities:
            #get the current speed
            speeds.append(current_speed[active])
      if len(active_entities) > 0:
          avg_speed = (sum(speeds)/(len(speeds)))  
        
      
 #### RIGHT MOVEMENT #####        
      if movement == "right":
            #### is_active ####                                             
          for active in active_entities:                                    # check if an entity is still active
              if last_time_modified[active] + 3 < frame_counter:            # if : entity not updated for 3 frames?
                  if entity_list[active][0][0]  > 0.95:                     # and last recorded position is in exit zone?
                                                                            # -> proceed to deactivate the entity
                        
                                                                            # if the entity gets deactivated:
                                                                            # calculate the distance to the next entity against the direction of movement
                                                                            # this distance is later used for MISSING OBJECT IDENTIFIER
                        
                    # check distance for an entity once, when deactivating it
                    # only take distance for entities which have successfully been counted
                    if is_entity_counted[active]:
                        
                        # create list x_pos which will contain all center positions of currently counted hidden or visible entities
                        x_pos = []
                        for hidden in hidden_entities:
                            if active != hidden:
                                if is_entity_counted[hidden]:
                                    x_pos.append(expected_next_center[hidden])
                        for visible in visible_entities:
                            if active != visible:
                                if is_entity_counted[visible]:
                                    x_pos.append(entity_list[visible][0][0])
                        
                        # if other active entities were found for a distance check:
                        if len(x_pos) > 0:
                            
                            # sort the list of x_pos
                            # the sorting setting of this list depends on the movement direction                            
                            # for a right movement this has to be set on True (from right to left)
                            x_pos.sort(reverse=True) 
                            # the sorting ensures that the next entity to the leaving one is chosen for the distance calculation
                            
                            # the distance is appended to the distance list for the later check
                            # this will be needed for the missing vine checker later on
                            # it works as follows:
                            # if the next active entity to the currently leaving entity is hidden --> calculate abs distance to expected next center
                            # if the next active entity to the currently leaving entity is visible --> calculate abs distance to this position
                            if active in hidden_entities:
                                entity_distance.append((active,abs(expected_next_center[active] - x_pos[0])))
                            if active in visible_entities:  
                                entity_distance.append((active,abs(entity_list[active][0][0] - x_pos[0])))
                                
                            
                        # if ni other active entities were found for a distance check:
                        # use a max range value for the distance
                        # for right movement -> abs( position d-box - 10% of image with.)
                        # for left movement -> abs( position d-box - 90% of image with. )
                        else:
                            entity_distance.append((active,abs(entity_list[active][0][0] - 0.1)))

                            
                    # Here the checkup screenshot is written in case the winemaker wants to verify a missing vine
                    # the screenshot name contains the video_name + frame number + MSEC position in the video
                    cv2.imwrite('vine_screenshots/'+video_name+"_"+str(frame_counter)+"_"+str(cap.get(cv2.CAP_PROP_POS_MSEC))+"_vine.jpg",image_np)    # Save Screenshot of frame if a vine leaves the screen
                    
                    # deactivate the entity afterwards
                    is_active[active] = False                             
                                        
                    # append MSEC Position in a separate list with the entity number if it leaves the screen
                    # this is important in order to match the missing vine with the specific gps location of the video later on
                    # entity_MSEC = [(ID, MSEC), (ID, MSEC)]
                    entity_MSEC.append((active,cap.get(cv2.CAP_PROP_POS_MSEC)))
                    
                  elif expected_next_center[active] > 0.95:                  # check if expected position is in exit zone?
                                                                             # same procedure as above....
                    if is_entity_counted[active]:
                        
                        x_pos = []
                        for hidden in hidden_entities:
                            if active != hidden:
                                if is_entity_counted[hidden]:
                                    x_pos.append(expected_next_center[hidden])
                        for visible in visible_entities:
                            if active != visible:
                                if is_entity_counted[visible]:
                                    x_pos.append(entity_list[visible][0][0])
                        
                        if len(x_pos) > 0:
                            x_pos.sort(reverse=True) 

                            if active in hidden_entities:
                                entity_distance.append((active,abs(expected_next_center[active] - x_pos[0])))
                            if active in visible_entities:  
                                entity_distance.append((active,abs(entity_list[active][0][0] - x_pos[0])))   
                        else:
                            entity_distance.append((active,abs(entity_list[active][0][0] - 0.1)))
                                                
                    cv2.imwrite('vine_screenshots/'+video_name+"_"+str(frame_counter)+"_"+str(cap.get(cv2.CAP_PROP_POS_MSEC))+"_vine.jpg",image_np)    # Save Screenshot of frame if a vine leaves the screen

                    is_active[active] = False                           
                    
                    entity_MSEC.append((active,cap.get(cv2.CAP_PROP_POS_MSEC)))
                    
                  # if a Entity can not be updated again for a long time (90 frames or more)
                  # -> deactivate the entity
                  # -> if it was counted => reverse this count  
                  elif last_time_modified[active] + 90 < frame_counter:  
                    is_active[active] = False
                    if is_entity_counted[active] == True:
                        vine_counter = vine_counter-1
                        
              elif last_time_modified[active] + 60 < frame_counter: # eleminate entities going completly lost outside the exit box
                    is_active[active] = False



        ## active_entities ##
        # this is used to update the list of active entities for the next frame
        # get the indices for all active entities
        # update the list of all currently active entities (active_entities)
          active_entities.clear()
          active_entities = [i for i, j in enumerate(is_active) if j == True]


        ## visible entities and hidden entities ##
        # this is used to update the list of hidden and visible entities for the next frame
        # if a entity did not get an update since 3 frames -> assign it to the hidden entities list
        # if a entity did get an update since 3 frames -> assign it to the visible entities list
        #
        # it is done by comparing the last time modified frame number with the current frame number
          visible_entities = []
          hidden_entities = []
          for active in active_entities:
                if last_time_modified[active]+3 > frame_counter:
                    visible_entities.append(active)
                else:
                    hidden_entities.append(active)

        ## current_speed ##
        # calculate the moving speed for each active entitiy
        # a speed can only be calculated if the entity has at least 2 entries of known positions on frames
        # 
          for active in active_entities:
        # check if the entity has 2 entries:
        # if yes: calculate the speed
              if len(entity_list[active]) >= 2 :
                  diff = []
                    
                  for k in range(len(entity_list[active])-1):
                      diff.append(0)
                      # calculate the difference of consecutive X-Positions of the center of an entity
                      # if this difference for a center pair is negative for a right movement -> use the avg_speed instead
                      # this ensures that the speed is always aligned to the movement direction
                      # since vines are static it should not be possible that they have a negative movement speed
                      # therefore this condition is used
                      if (entity_list[active][k][0] - entity_list[active][k+1][0]) >= 0 :                
                          # the difference is normalized since there might frame gaps between center pairs
                          # therefore the difference is divided by the difference between the framenumbers of the center pairs
                          #  normalized difference = x_value_center[k] - x_value_center[k+1] / framenumber[k] - framenumber[k+1]
                          diff[k] = (entity_list[active][k][0] - entity_list[active][k+1][0])/(entity_list[active][k][2]-entity_list[active][k+1][2])
                      else:
                          diff[k] = avg_speed
                  # the current speed is the average of all distances between the consecutive X-Positions of the center of an entity         
                  current_speed[active] = sum(diff) / (len(entity_list[active])-1)
                    
                  # dampening the speed if it makes too high of a spike
                  # this can be used to dampen the speed if needed
                    
                  #if current_speed[active]*2.5 >= avg_speed:
                  #      current_speed[active] = current_speed[active]*0.8
                  #      #current_speed[active] = avg_speed
                  #else:
                  #      current_speed[active] = current_speed[active]
                        

          # if no: use the avg_speed over all entities instead until it has at least 2 entries
          # see avg_speed below
              else:
                  current_speed[active] = avg_speed #durch avg ersetzen
        
        ## expected_next_center ##                             
        # calculate the expected position for each active entities for the next frame
        # check if the entity got an update on the current frame
        # if it got updated: use the current center position of the assigned detection box and move it by the speed of the entity
          for active in active_entities:    
               if last_time_modified[active] == frame_counter: 
                   expected_next_center[active] = entity_list[active][0][0] + current_speed[active]
        # if it did not get an update: use the last expected position of the entity and move it by the speed of the entity
               elif last_time_modified[active] < frame_counter: 
                    expected_next_center[active] = expected_next_center[active] + current_speed[active]
      
    
    ## average entity speed ##
    # calculate the average entity speed
    # use the values of the movement speed of all currently acitve entities
    # calculate the average over them
    
    # this is needed for the calculation of the expected next position of a hidden entitie:
    # if it was only seen once after it turned into the hidden status there is no possibility to calculate the speed of it
    # therefore the avg_speed is needed 
      speeds = []
      for active in active_entities:
            #get the current speed
            speeds.append(current_speed[active])
      if len(active_entities) > 0:
          avg_speed = (sum(speeds)/(len(speeds)))
  
    
    ## movement update ##
      # can be used to adapt to a changing movement direction within a video
      # not needed for this given prototype
    
      #if avg_speed < 0:
      #    movement = "left"
      #elif avg_speed > 0:
      #    movement = "right"
      #else:
      #    movement = movement
    
  ################################# END OF: UPDATING ENTITIES AFTER ASIIGNMENT PROCESS   ##################################
    
  #<---------------------------------------------- END OF: OBJECT TRACKING   --------------------------------------------> 







  #<---------------------------------------------- START OF: OBJECT COUNTER   -------------------------------------------->

    
    
      ## vine counter ##
      # increase vine_counter (only if 10+ labels belong to one entity)
      # a vine is only counted if the entity got appended a certain amount of matching objects
      # only counted if above a certain count_threshold. this threshold was setup in the beginning (currently 10)
      for active in active_entities:
        if is_entity_counted[active] == False and len(entity_list[active]) > count_threshold :
            is_entity_counted[active] = True
            vine_counter += 1
                              
  #<---------------------------------------------- END OF: OBJECT COUNTER   ---------------------------------------------->






  #<----------------------------------------------- START OF: VISUALIZATION   -------------------------------------------->
    
    # This section is used in order to visualize the actual object detection, object tracking and counting.
    # It is not needed for the functionallity of the system and is only used to visualize the algorithm. 
    # -> Visualization of object detection, object tracking and object counting
    #
    # A window will pop up which will show the application for the chosen video.
    # In order to exit the application press 'q' while it's running.
    
      # get visualizations for the detection-boxes
      vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8)
      
      # get visualizations for the active entities
      # it is only needed to visualize active ones since they are the only one that are currently on the screen
      for active in active_entities:
                                              
          for i in range(1, len(entity_list[active])):
                  # check if the entity has at least two entries in the deque list (two known positions)
                  # if not, no line can be drawn
                  if entity_list[active][i - 1] is None or entity_list[active][i] is None:
                      continue
                  
                  # draw a line the line of the last known positions of the entity
                  # this draws an line between the last known positions
                  # *w and *h is used to convert the relative positions into actual pixel positions which can be drawn
                  thickness = 5  
                  x1 = round(entity_list[active][i - 1][0] * w)
                  y1 = round(entity_list[active][i - 1][1] * h)
                  x2 = round(entity_list[active][i][0] * w)
                  y2 = round(entity_list[active][i][1] * h)
                  cv2.line(image_np, (x1,y1), (x2,y2), (255,255,0), thickness)
                  
          # draw a dot for the expected next center
          x3 = round(expected_next_center[active] * w)
          y3 = round(entity_list[active][0][1] * h)
          cv2.circle(image_np, (x3,y3) , 13, (255 , 0, 0), thickness=-1, lineType=8, shift = 0)
          # cv2.circle(img, center, radius, color, thickness=1, lineType=8, shift=0)
          
          # draw the id of the entity into the corresponding expected center of it
          # the -9 and +9 is used the align it somewhere in the actual center of the dot
          font = cv2.FONT_HERSHEY_SIMPLEX
          cv2.putText(image_np,str(active),(x3-9,y3+9), font, 0.6,(255,255,255),2,cv2.LINE_AA)
                  
          # draw the current vine counter in the upper left corner (50,50) of the image
          font = cv2.FONT_HERSHEY_SIMPLEX
          cv2.putText(image_np,('counted vines: ' + str(vine_counter)),(50,50), font, 1,(255, 0, 0),2,cv2.LINE_AA)  

      # show the window with the vizualizations
      # setup breakup of the  via pressing 'q'
      cv2.imshow('object detection window',image_np) 
      
      #cv2.resize(image_np, (1600,900))  -> this can be used if the window scale is too big for the screen
      if cv2.waitKey(1) & 0xFF == ord('q'):
               break
    
   #<----------------------------------------------- END OF: VISUALIZATION   -------------------------------------------->
    
    
    # release the video after it has been processed
    cap.release()
    cv2.destroyAllWindows() 

### Result of the Object Counter

In [None]:
print(vine_counter)

## MISSING OBJECT IDENTIFIER
* calculate mean distance between vines for the video
* based on this mean distance -> identify positions of missing vines

In [None]:
##  get distances of detected objects (vines)
##  append these distance to the list vines_distances
vines_distances = []
for i in range(len(entity_distance)):
    vines_distances.append(entity_distance[i][1])

# calculate the std and mean distance for the video    
std_dist = np.std(vines_distances)
mean_dist = sum(vines_distances)/len(vines_distances)

# delete outlier distances in order to calculate a more accurate mean distance
# -> delete outliers and calculate clean_mean and clean_std for the distances of the given video
cleaned_vines_distances = []
for i in range(len(vines_distances)):
    # A threshold of 1.5 times the actual mean distance is used to detect outliers
    if (abs(float(vines_distances[i])-float(mean_dist))) < 1.5 * float(std_dist):
        cleaned_vines_distances.append(vines_distances[i])

# calculate and pringt out the cleaned versions of std and mean for the distances       
std_dist_cleaned = np.std(cleaned_vines_distances)
print("std_dist_cleaned"+str(std_dist_cleaned))        
mean_dist_cleaned = sum(cleaned_vines_distances)/len(cleaned_vines_distances)
print("mean_dist_cleaned"+str(mean_dist_cleaned))

# set up list for missing vines
missing_vines_msec_locations = []

for k in range(len(vines_distances)):
    
    # create list of missing vines locations
    # if a distance to the next vine is larger than 1.8 times the mean distance -> a missing object (vine) is detected
    if vines_distances[k] > 1.8 * mean_dist_cleaned:    # can easily be adjusted
        for m in range(len(entity_MSEC)):
            if entity_MSEC[m][0] == entity_distance[k][0]:       
                if movement=="right":
                    print("Missing Vine on the left of entity " + str(entity_distance[k][0]) + "at MSEC: "+ str(entity_MSEC[m][1]) + " in the Video " )
                    missing_vines_msec_locations.append(entity_MSEC[m])
                else:
                    print("Missing Vine on the right of entity  " + str(entity_distance[k][0]) + "at MSEC: "+ str(entity_MSEC[m][1]) + " in the Video "  )
                    
# show all missing vines with their msec positions in the video
print(missing_vines_msec_locations)

### Result of the Missing Object Identifier

In [None]:
print(len(missing_vines_msec_locations))

# Output Processing
* combine vine counter and missing vine positions with the gps-data of the video
* visualize the result on an interactive map

In [None]:
from datetime import datetime
import os
import cv2
import pandas as pd
import numpy as np
from research.object_detection.utils import visualization_utils as vis_util
from PIL import Image
import folium
from matplotlib import pyplot as plt

In [None]:
video_name

### Read the gps-information of  the csv file
* only extract the timestamp, latitude and longitude
* rename these fields to date, lat and lon
* adjust the format of the timestamp field (this is set up for the sepicific used csv files)
* for csv files in another format, this section has to be changed
* -> it is set up to work for the csv files of the used sample videos

In [None]:
from datetime import datetime

dataframe_gps = pd.read_csv(video_name+'_Hero7 Black-GPS5.csv', sep=',')
dataframe_gps = dataframe_gps[['date','GPS (Lat.) [deg]','GPS (Long.) [deg]' ]]
dataframe_gps = dataframe_gps.rename(columns={"GPS (Lat.) [deg]": "lat", "GPS (Long.) [deg]": "lon"})
dataframe_gps['date'] = dataframe_gps['date'].str[-13:].str[:12]
dataframe_gps[('date')] = pd.to_datetime(dataframe_gps[('date')])

dataframe_gps.head(10)
#dataframe_gps.info()

### Convert date formats and Calculate relative timestamp in the video
* convert the date format to datetime
* calculate the relative timestampf (diff) for each GPS-timestamp in the video
* in order to do this: take each timestamp and subtract the min timestamp for each

In [None]:
dataframe_gps[('date')] = pd.to_datetime(dataframe_gps[('date')])
dataframe_gps[('date')] = pd.to_datetime(dataframe_gps[('date')])
max(dataframe_gps[('date')])
#dataframe_gps.head(10)

In [None]:
datediff = dataframe_gps[('date')] - min(dataframe_gps[('date')])
dataframe_gps[('diff')] = datediff
dataframe_gps.head(10)

### Calculate the millisecond for each relative timestamp in the video
* calculate the millisecond position (MSEC) in the video for each GPS-information
* this MSEC is needed in order to assign identified missing objects (missing vines) to actual gps-positions of the video

In [None]:
MSEC_all = []    
for index, row in dataframe_gps.iterrows():

    row[('diff')] = row[('diff')].total_seconds()*1000
    MSEC_all.append(row[('diff')])


MSEC_all = np.array(MSEC_all)

dataframe_gps[('MSEC')] = MSEC_all
dataframe_gps.head(10)

### Create final dataframe for the map creation
* consisting of: MSEC, lat, lon

In [None]:
dataframe_gps = dataframe_gps[['MSEC', 'lat', 'lon']]
dataframe_gps.head(10)

### Create dataframe for all identified missing objects (missing vines)
* create a dataframe also consisting of: MSEC, lat, lon
* compare the millisecond positions of the missing vines with the millisecond positions of available gps-timestamps of the video
* assign a missing vine to the gps-position of the video with the smallest difference between both millisecond positions
* min ( abs (MSEC of missing vine - MSEC of a gps-position of the video))
* add matched missing vine gps-locations to the dataframe missing_v

In [None]:
missing_vines_msec_locations
fehlstellen_counter = len(missing_vines_msec_locations)
fehlstellen_counter

In [None]:
missing_v = pd.DataFrame(columns=['MSEC','lat','lon'])

for k in range(len(missing_vines_msec_locations)):
    #print(missing_vines_msec_locations[k][1])
    
    loc = dataframe_gps.loc[(dataframe_gps['MSEC']-missing_vines_msec_locations[k][1]).abs().argsort()[:1]]
    loc = loc.values
    
    missing_v = missing_v.append(pd.Series([loc[0][0], loc[0][1], loc[0][2]], index=missing_v.columns ), ignore_index=True)

missing_v

## Set up map creation
* create the base map with folium
* add the line for the vineyard to the map
* add the points for missing vines to the map 

In [None]:
# make sure the gps-data of the video is saved as float format
dataframe_gps['lat'] = dataframe_gps['lat'].astype(float)
dataframe_gps['lon'] = dataframe_gps['lon'].astype(float)


# calculate max value for dynamic map scaling
# this is used in order to get an adjusted initial view for the created map
min_lat = float(dataframe_gps['lat'].min())
min_lon = float(dataframe_gps['lon'].min())

# make sure the gps-data of missing vines is saved as float format
missing_v['lat'] = missing_v['lat'].astype(float)
missing_v['lon'] = missing_v['lon'].astype(float)

# create the acutal map visual with folium
# use the calculated min and max values for lat and lon as an entry point for the map
map_visual = folium.Map(location=[min_lat, min_lon], zoom_start=35, )


# add the vineyard to the map
# draws every available gps-position of the video as a blue point on the map
# -> resulting in a line which represents a vineyard
# -> for each point on this line add a popup functionallity is added which shows the following by clicking on it:
# -> "There are X vines an Y missing vines in this row / Z% missing vines for this row"
for index, row in dataframe_gps.iterrows():
        folium.CircleMarker([row['lat'], row['lon']],
                            radius=0.2,
                            popup="There are "+" "+str(vine_counter)+" vines and " +  str(fehlstellen_counter)+" missing vine(s) in this row /  "+ str(round(fehlstellen_counter/vine_counter, 2)*100)+"% missing vines for this row",
                            fill_color="#f53220",
                           ).add_to(map_visual)



# add the positions of the missing vines to the map
# draws every gps-position of a missing vine as red circle to a point on the map
# -> for each point a popup functionallity is added which shows the following by clicking on it:
# -> "missing vine number X at Latitude: (lat) and Longitude (lon)
count = 0
for index, row in missing_v.iterrows():
    print(row['lat'])
    print(row['lon'])
    count = count + 1
    folium.Circle([row['lat'], row['lon']],
                            radius=1.2,
                            popup="missing vine ("+ str(count)+") at Latitude: "+ str(row['lat'])+" and Longitude: "+str(row['lon']),
                            color="crimson",
                           ).add_to(map_visual)
    
print("map ready to be created")

## Create the finished map
* same the resulting output map
* saves the map in the "gps_output" directory which must exist in the directory this skript is executed
* the output name of the map will depend on the name of the used video
* e.g. "video_9_GPS_output.html"

In [None]:
map_visual.save(os.path.join('gps_output', video_name+'_GPS_output.html'))