In [None]:
!git clone https://github.com/david8862/keras-YOLOv3-model-set

In [None]:
%cd ./keras-YOLOv3-model-set

In [None]:
!wget -O weights/yolov3.weights https://pjreddie.com/media/files/yolov3.weights
!wget -O weights/yolov3-tiny.weights https://pjreddie.com/media/files/yolov3-tiny.weights
!python tools/model_converter/convert.py cfg/yolov3.cfg weights/yolov3.weights weights/yolov3.h5
!python tools/model_converter/convert.py cfg/yolov3-tiny.cfg weights/yolov3-tiny.weights weights/yolov3-tiny.h5

In [2]:
import cv2
import matplotlib.pyplot as plt
import random
import os
import csv
import xml.etree.ElementTree as ET
from PIL import Image
import copy
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K
from tensorflow.keras.layers import ReLU, Activation, Multiply

In [None]:
def VOCtoCSV(dataset_dir, annotation_dir):
  """
  Convert PASCAL VOC annotations to a CSV file format.
  Args:
  dataset_dir : str : directory path where dataset is present
  annotation_dir : str : directory path where annotations are present
  Returns:
  None

  Outputs:
  annotations.txt : CSV file with following format:
                    dataset_dir/subfolder/image_filename x1_min,y1_min,x1_max,y1_max,class_id1 x2_min,y2_min,x2_max,y2_max,class_id2 ...
  classes.txt : List of all the classes in the annotations
  """
  # open a CSV file to write the data to
  with open(os.path.join(dataset_dir, 'annotations.txt'), 'w', newline='') as f:
      writer = csv.writer(f, delimiter=" ", escapechar=',')
      # write the header row to the CSV file
      #writer.writerow(['image_name', 'xmin', 'ymin', 'xmax', 'ymax', 'class'])

      labels = []

      # iterate through the subfolders in the dataset directory
      for subfolder in os.listdir(dataset_dir):
          if subfolder.startswith('sequence_'):
              # Extract the sequence number from the subfolder name
              sequence_num = subfolder.split('_')[1]
              # Load the corresponding XML file from the annotation folder
              xml_file = os.path.join(annotation_dir, 'annotation_s{}.xml'.format(sequence_num))
              tree = ET.parse(xml_file)
              root = tree.getroot()
              for image in root.findall('images/image'):
                image_filename = image.attrib.get('file')
                # Find all the box elements in the XML file
                boxes = []
                for box in image.findall(f'box'):
                    # Extract the class label
                    class_label = box.find('label').text
                    if class_label not in labels:
                      labels.append(class_label)

                    # Extract the bounding box coordinates
                    xmin = box.attrib.get('left')
                    ymin = box.attrib.get('top')
                    xmax = int(xmin) + int(box.attrib.get('width'))
                    ymax = int(ymin) + int(box.attrib.get('height'))
                    boxes.append(','.join([str(xmin), str(ymin), str(xmax), str(ymax), str(labels.index(class_label))]))
                # Write the data to a row in the CSV file
                #writer.writerow([dataset_dir+'/'+subfolder+'/'+image_filename, str(' '.join(boxes))])
                f.write(' '.join([dataset_dir+'/'+subfolder+'/'+image_filename, str(' '.join(boxes))]) + '\n')

      with open(os.path.join(dataset_dir, 'classes.txt'), 'w') as l:
        for label in labels:
          l.write(label+'\n')

In [None]:
dataset_dir = '/content/drive/MyDrive/RabbitHole/data'
annotation_dir = '/content/drive/MyDrive/RabbitHole/data/annotation'

VOCtoCSV(dataset_dir, annotation_dir)

In [None]:
def plot_show_image_with_boxes(image_path, boxes_list):
  """
  Plot and display an image with bounding boxes.
  Args:
  image_path : str : path to the image
  boxes_list : List[str] : List of strings with each string containing bounding box coordinates in the format 'xmin,ymin,xmax,ymax,classid'
  Returns:
  None
  Outputs:
  Image is displayed with bounding boxes drawn on it
  """
  # read image
  image = cv2.imread(image_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

  # plot boxes
  for i in range(len(boxes_list)):
    xmin, ymin, xmax, ymax, classid = boxes_list[i].split(',')
    image = cv2.rectangle(image,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(255,0,0), 5)

  plt.imshow(image)
  plt.show()

In [None]:
IMAGE_PATH = '/content/drive/MyDrive/RabbitHole/data/sequence_1/frame_s1_1.jpg'
ANNOTATIONS_PATH = '/content/drive/MyDrive/RabbitHole/data/annotations.txt'

with open(ANNOTATIONS_PATH) as f:
  lines = f.readlines()

  for line in lines:

    line = line.split(' ')

    if line[0] == IMAGE_PATH:
      plot_show_image_with_boxes(image_path=line[0],
                                 boxes_list=line[1:])

In [None]:
class_counts = {}

with open('/content/drive/MyDrive/RabbitHole/data/annotations.txt', 'r') as f:
    reader = csv.reader(f, delimiter=' ')
    for row in reader:
        for box in row[1:]:
            class_id = box.split(',')[-1]
            if class_id in class_counts:
                class_counts[class_id] += 1
            else:
                class_counts[class_id] = 1

print(class_counts)

In [None]:
def train_valid_split(file_path, output_path, split_percentage=0.8, shuffle_flag=True):
  """
  Split a file into train and validation sets.
  Args:
  file_path : str : path to the file that needs to be split
  output_path : str : path to the directory where the train and validation files will be saved
  split_percentage : float : percentage of data to be used for training, default value is 0.8
  shuffle_flag : bool : flag to shuffle the data before splitting, default value is True
  Returns:
  None
  Outputs:
  train.txt : file containing training data
  valid.txt : file containing validation data
  """
  with open(file_path, 'r') as f:
      lines = f.readlines()
  if shuffle_flag:
      random.shuffle(lines)
  split_index = int(len(lines) * split_percentage)
  train_lines = lines[:split_index]
  valid_lines = lines[split_index:]
  with open(f'{output_path}/train.txt', 'w') as f:
      f.writelines(train_lines)
  with open(f'{output_path}/valid.txt', 'w') as f:
      f.writelines(valid_lines)

In [None]:
train_valid_split('/content/drive/MyDrive/RabbitHole/data/annotations.txt',
                  '/content/drive/MyDrive/RabbitHole/data',
                  0.8, True)

Pré treino.

In [None]:
%cd /content/drive/MyDrive/RabbitHole/keras-YOLOv3-model-set

In [None]:
!apt install --allow-change-held-packages libcudnn8

In [None]:
%cd /content/drive/MyDrive/RabbitHole/keras-YOLOv3-model-set

In [None]:
!pip install onnxruntime

In [None]:
!python train.py --model_type=yolo3_mobilenet_lite \
--anchors_path=configs/yolo3_anchors.txt \
--annotation_file=/content/drive/MyDrive/RabbitHole/data/train.txt \
--classes_path=/content/drive/MyDrive/RabbitHole/data/classes.txt \
--eval_online \
--save_eval_checkpoint

In [None]:
!mkdir /content/drive/MyDrive/RabbitHole/output/yolo3_mobilenet_lite

In [None]:
!cp /content/drive/MyDrive/RabbitHole/keras-YOLOv3-model-set/logs/000/ep020-loss7.758-val_loss7.674-mAP62.063.h5 \
/content/drive/MyDrive/RabbitHole/output/yolo3_mobilenet_lite/trained_final.h5

In [None]:
%cd /content/drive/MyDrive/RabbitHole/keras-YOLOv3-model-set

In [None]:
!python yolo.py \
--model_type=yolo3_mobilenet_lite \
--weights_path=/content/drive/MyDrive/RabbitHole/output/yolo3_mobilenet_lite/trained_final.h5 \
--anchors_path=configs/yolo3_anchors.txt \
--classes_path=/content/drive/MyDrive/RabbitHole/data/classes.txt \
--model_input_shape=416x416 \
--dump_model \
--output_model_file=/content/drive/MyDrive/RabbitHole/output/yolo3_mobilenet_lite/yolo3_mobilenet_lite.h5

In [None]:
!pwd

In [None]:
!pip install --upgrade pip

In [None]:
!pip install onnxruntime
!pip install numpy==1.24.4

In [None]:
def load_eval_model(model_path):
    # support of tflite model
    if model_path.endswith('.tflite'):
        from tensorflow.lite.python import interpreter as interpreter_wrapper
        model = interpreter_wrapper.Interpreter(model_path=model_path)
        model.allocate_tensors()
        model_format = 'TFLITE'

    # normal keras h5 model
    elif model_path.endswith('.h5'):
        custom_object_dict = get_custom_objects()

        model = load_model(model_path, compile=False, custom_objects=custom_object_dict)
        model_format = 'H5'
        K.set_learning_phase(0)
    else:
        raise ValueError('invalid model file')

    return model, model_format

def get_custom_objects():
    '''
    form up a custom_objects dict so that the customized
    layer/function call could be correctly parsed when keras
    .h5 model is loading or converting
    '''
    custom_objects_dict = {
        'tf': tf,
        'swish': swish,
        'hard_sigmoid': hard_sigmoid,
        'hard_swish': hard_swish,
        'mish': mish
    }

    return custom_objects_dict

def swish(x):
    """Swish activation function.
    # Arguments
        x: Input tensor.
    # Returns
        The Swish activation: `x * sigmoid(x)`.
    # References
        [Searching for Activation Functions](https://arxiv.org/abs/1710.05941)
    """
    if K.backend() == 'tensorflow':
        try:
            # The native TF implementation has a more
            # memory-efficient gradient implementation
            return K.tf.nn.swish(x)
        except AttributeError:
            pass

    return x * K.sigmoid(x)

def hard_sigmoid(x):
    return ReLU(6.)(x + 3.) * (1. / 6.)

def hard_swish(x):
    return Multiply()([Activation(hard_sigmoid)(x), x])

def mish(x):
    return x * K.tanh(K.softplus(x))

In [None]:
def yolo_predict_tflite(interpreter, image, anchors, num_classes, conf_threshold):#, elim_grid_sense, v5_decode):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    #print("input/output details:")
    #print(input_details)
    #print(output_details)

    # check the type of the input tensor
    #if input_details[0]['dtype'] == np.float32:
        #floating_model = True

    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]
    model_input_shape = (height, width)

    image_data = preprocess_image(image, model_input_shape)
    #origin image shape, in (height, width) format
    image_shape = image.size[::-1]
    #print(f"image shape: {image_shape}")

    interpreter.set_tensor(input_details[0]['index'], image_data)
    start = time.time()
    interpreter.invoke()
    print('Time for inference:',time.time()-start)

    prediction = []
    for output_detail in output_details:
        output_data = interpreter.get_tensor(output_detail['index'])
        prediction.append(output_data)
        #print(f"output data: {output_data}")

    #if len(anchors) == 5:
    #    # YOLOv2 use 5 anchors and have only 1 prediction
    #    assert len(prediction) == 1, 'invalid YOLOv2 prediction number.'
    #    pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=elim_grid_sense)
    #else:
    #    if v5_decode:
    #        pred_boxes, pred_classes, pred_scores = yolo5_postprocess_np(prediction, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=True) #enable "elim_grid_sense" by default
    #    else:
    #        pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=elim_grid_sense)
    start = time.time()
    pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold)#, elim_grid_sense=elim_grid_sense)
    print('Time to postprocess prediction',time.time()-start)

    return pred_boxes, pred_classes, pred_scores

In [None]:
import numpy as np

def expit(x):

    z = np.exp(-x)
    sig = 1 / (1 + z)

    return sig

In [None]:
#from scipy.special import expit

def preprocess_image(image, model_input_shape):
    """
    Prepare model input image data with letterbox
    resize, normalize and dim expansion
    # Arguments
        image: origin input image
            PIL Image object containing image data
        model_input_shape: model input image shape
            tuple of format (height, width).
    # Returns
        image_data: numpy array of image data for model input.
    """
    #resized_image = cv2.resize(image, model_input_shape[::-1], cv2.INTER_AREA)
    resized_image = letterbox_resize(image, model_input_shape[::-1])
    image_data = np.asarray(resized_image).astype('float32')
    image_data = normalize_image(image_data)
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
    return image_data

def letterbox_resize(image, target_size, return_padding_info=False):
    """
    Resize image with unchanged aspect ratio using padding
    # Arguments
        image: origin image to be resize
            PIL Image object containing image data
        target_size: target image size,
            tuple of format (width, height).
        return_padding_info: whether to return padding size & offset info
            Boolean flag to control return value
    # Returns
        new_image: resized PIL Image object.
        padding_size: padding image size (keep aspect ratio).
            will be used to reshape the ground truth bounding box
        offset: top-left offset in target image padding.
            will be used to reshape the ground truth bounding box
    """
    src_w, src_h = image.size
    target_w, target_h = target_size

    # calculate padding scale and padding offset
    scale = min(target_w/src_w, target_h/src_h)
    padding_w = int(src_w * scale)
    padding_h = int(src_h * scale)
    padding_size = (padding_w, padding_h)

    dx = (target_w - padding_w)//2
    dy = (target_h - padding_h)//2
    offset = (dx, dy)

    # create letterbox resized image
    image = image.resize(padding_size, Image.BICUBIC)
    new_image = Image.new('RGB', target_size, (128,128,128))
    new_image.paste(image, offset)

    if return_padding_info:
        return new_image, padding_size, offset
    else:
        return new_image

def normalize_image(image):
    """
    normalize image array from 0 ~ 255
    to 0.0 ~ 1.0
    # Arguments
        image: origin input image
            numpy image array with dtype=float, 0.0 ~ 255.0
    # Returns
        image: numpy image array with dtype=float, 0.0 ~ 1.0
    """
    image = image.astype(np.float32) / 255.0

    return image

def yolo3_postprocess_np(yolo_outputs, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=0.1, iou_threshold=0.4, elim_grid_sense=False):
    # here we sort the prediction tensor list with grid size (e.g. 19/38/76)
    # to make sure it matches with anchors order
    yolo_outputs.sort(key=lambda x: x.shape[1])
    #print(f"yolo outputs sorted: {yolo_outputs}")

    #print(f"anchors: {anchors}")

    predictions = yolo3_decode(yolo_outputs, anchors, num_classes, input_shape=model_input_shape, elim_grid_sense=elim_grid_sense)
    #print(predictions)
    predictions = yolo_correct_boxes(predictions, image_shape, model_input_shape)
    #print(predictions)

    boxes, classes, scores = yolo_handle_predictions(predictions,
                                                     image_shape,
                                                     num_classes,
                                                     max_boxes=max_boxes,
                                                     confidence=confidence,
                                                     iou_threshold=iou_threshold)

    print(classes, scores)

    boxes = yolo_adjust_boxes(boxes, image_shape)
    print(boxes)

    return boxes, classes, scores

def yolo3_decode(predictions, anchors, num_classes, input_shape, elim_grid_sense=False):
    """
    YOLOv3 Head to process predictions from YOLOv3 models
    :param num_classes: Total number of classes
    :param anchors: YOLO style anchor list for bounding box assignment
    :param input_shape: Input shape of the image
    :param predictions: A list of three tensors with shape (N, 19, 19, 255), (N, 38, 38, 255) and (N, 76, 76, 255)
    :return: A tensor with the shape (N, num_boxes, 85)
    """
    print(f"predictions length: {len(predictions)}, anchors length: {len(anchors)//3}")
    assert len(predictions) == len(anchors)//3, 'anchor numbers does not match prediction.'

    if len(predictions) == 3: # assume 3 set of predictions is YOLOv3
        anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
        scale_x_y = [1.05, 1.1, 1.2] if elim_grid_sense else [None, None, None]
    elif len(predictions) == 2: # 2 set of predictions is YOLOv3-tiny
        anchor_mask = [[3,4,5], [0,1,2]]
        scale_x_y = [1.05, 1.05] if elim_grid_sense else [None, None]
    else:
        raise ValueError('Unsupported prediction length: {}'.format(len(predictions)))

    results = []
    for i, prediction in enumerate(predictions):
        results.append(yolo_decode(prediction, anchors[anchor_mask[i]], num_classes, input_shape, scale_x_y=scale_x_y[i], use_softmax=False))

    return np.concatenate(results, axis=1)

def yolo_decode(prediction, anchors, num_classes, input_shape, scale_x_y=None, use_softmax=False):
    '''Decode final layer features to bounding box parameters.'''
    batch_size = np.shape(prediction)[0]
    num_anchors = len(anchors)

    grid_shape = np.shape(prediction)[1:3]
    #check if stride on height & width are same
    assert input_shape[0]//grid_shape[0] == input_shape[1]//grid_shape[1], 'model stride mismatch.'
    stride = input_shape[0] // grid_shape[0]

    prediction = np.reshape(prediction,
                            (batch_size, grid_shape[0] * grid_shape[1] * num_anchors, num_classes + 5))

    ################################
    # generate x_y_offset grid map
    grid_y = np.arange(grid_shape[0])
    grid_x = np.arange(grid_shape[1])
    x_offset, y_offset = np.meshgrid(grid_x, grid_y)

    x_offset = np.reshape(x_offset, (-1, 1))
    y_offset = np.reshape(y_offset, (-1, 1))

    x_y_offset = np.concatenate((x_offset, y_offset), axis=1)
    x_y_offset = np.tile(x_y_offset, (1, num_anchors))
    x_y_offset = np.reshape(x_y_offset, (-1, 2))
    x_y_offset = np.expand_dims(x_y_offset, 0)

    ################################

    # Log space transform of the height and width
    anchors = np.tile(anchors, (grid_shape[0] * grid_shape[1], 1))
    anchors = np.expand_dims(anchors, 0)

    if scale_x_y:
        # Eliminate grid sensitivity trick involved in YOLOv4
        #
        # Reference Paper & code:
        #     "YOLOv4: Optimal Speed and Accuracy of Object Detection"
        #     https://arxiv.org/abs/2004.10934
        #     https://github.com/opencv/opencv/issues/17148
        #
        box_xy_tmp = expit(prediction[..., :2]) * scale_x_y - (scale_x_y - 1) / 2
        box_xy = (box_xy_tmp + x_y_offset) / np.array(grid_shape)[::-1]
    else:
        box_xy = (expit(prediction[..., :2]) + x_y_offset) / np.array(grid_shape)[::-1]
    box_wh = (np.exp(prediction[..., 2:4]) * anchors) / np.array(input_shape)[::-1]

    # Sigmoid objectness scores
    objectness = expit(prediction[..., 4])  # p_o (objectness score)
    objectness = np.expand_dims(objectness, -1)  # To make the same number of values for axis 0 and 1

    if use_softmax:
        # Softmax class scores
        class_scores = softmax(prediction[..., 5:], axis=-1)
    else:
        # Sigmoid class scores
        class_scores = expit(prediction[..., 5:])

    return np.concatenate([box_xy, box_wh, objectness, class_scores], axis=2)

def yolo_correct_boxes(predictions, img_shape, model_input_shape):
    '''rescale predicition boxes back to original image shape'''
    box_xy = predictions[..., :2]
    box_wh = predictions[..., 2:4]
    objectness = np.expand_dims(predictions[..., 4], -1)
    class_scores = predictions[..., 5:]

    # model_input_shape & image_shape should be (height, width) format
    model_input_shape = np.array(model_input_shape, dtype='float32')
    image_shape = np.array(img_shape, dtype='float32')
    height, width = image_shape

    new_shape = np.round(image_shape * np.min(model_input_shape/image_shape))
    offset = (model_input_shape-new_shape)/2./model_input_shape
    scale = model_input_shape/new_shape
    # reverse offset/scale to match (w,h) order
    offset = offset[..., ::-1]
    scale = scale[..., ::-1]

    box_xy = (box_xy - offset) * scale
    box_wh *= scale

    # Convert centoids to top left coordinates
    box_xy -= box_wh / 2

    # Scale boxes back to original image shape.
    image_wh = image_shape[..., ::-1]
    box_xy *= image_wh
    box_wh *= image_wh

    return np.concatenate([box_xy, box_wh, objectness, class_scores], axis=2)

def yolo_handle_predictions(predictions, image_shape, num_classes, max_boxes=5, confidence=0.1, iou_threshold=0.4, use_cluster_nms=False, use_wbf=False):
    boxes = predictions[:, :, :4]
    box_confidences = np.expand_dims(predictions[:, :, 4], -1)
    box_class_probs = predictions[:, :, 5:]

    # check if only 1 class for different score
    if num_classes == 1:
        box_scores = box_confidences
    else:
        box_scores = box_confidences * box_class_probs

    # filter boxes with score threshold
    box_classes = np.argmax(box_scores, axis=-1)
    box_class_scores = np.max(box_scores, axis=-1)
    pos = np.where(box_class_scores >= confidence)

    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]

    if use_cluster_nms:
        # use Fast/Cluster NMS for boxes postprocess
        n_boxes, n_classes, n_scores = fast_cluster_nms_boxes(boxes, classes, scores, iou_threshold, confidence=confidence)
    elif use_wbf:
        # use Weighted-Boxes-Fusion for boxes postprocess
        n_boxes, n_classes, n_scores = weighted_boxes_fusion([boxes], [classes], [scores], image_shape, weights=None, iou_thr=iou_threshold)
    else:
        # Boxes, Classes and Scores returned from NMS
        n_boxes, n_classes, n_scores = nms_boxes(boxes, classes, scores, iou_threshold, confidence=confidence)

    if n_boxes:
        boxes = np.concatenate(n_boxes)
        classes = np.concatenate(n_classes).astype('int32')
        scores = np.concatenate(n_scores)
        boxes, classes, scores = filter_boxes(boxes, classes, scores, max_boxes)

        return boxes, classes, scores

def yolo_adjust_boxes(boxes, img_shape):
    '''
    change box format from (x,y,w,h) top left coordinate to
    (xmin,ymin,xmax,ymax) format
    '''
    if boxes is None or len(boxes) == 0:
        return []

    image_shape = np.array(img_shape, dtype='float32')
    height, width = image_shape

    adjusted_boxes = []
    for box in boxes:
        x, y, w, h = box

        xmin = x
        ymin = y
        xmax = x + w
        ymax = y + h

        ymin = max(0, np.floor(ymin + 0.5).astype('int32'))
        xmin = max(0, np.floor(xmin + 0.5).astype('int32'))
        ymax = min(height, np.floor(ymax + 0.5).astype('int32'))
        xmax = min(width, np.floor(xmax + 0.5).astype('int32'))
        adjusted_boxes.append([xmin,ymin,xmax,ymax])

    return np.array(adjusted_boxes,dtype=np.int32)

def filter_boxes(boxes, classes, scores, max_boxes):
    '''
    Sort the prediction boxes according to score
    and only pick top "max_boxes" ones
    '''
    # sort result according to scores
    sorted_indices = np.argsort(scores)
    sorted_indices = sorted_indices[::-1]
    nboxes = boxes[sorted_indices]
    nclasses = classes[sorted_indices]
    nscores = scores[sorted_indices]

    # only pick max_boxes
    nboxes = nboxes[:max_boxes]
    nclasses = nclasses[:max_boxes]
    nscores = nscores[:max_boxes]

    return nboxes, nclasses, nscores

def box_iou(boxes):
    """
    Calculate IoU value of 1st box with other boxes of a box array
    Parameters
    ----------
    boxes: bbox numpy array, shape=(N, 4), xywh
           x,y are top left coordinates
    Returns
    -------
    iou: numpy array, shape=(N-1,)
         IoU value of boxes[1:] with boxes[0]
    """
    # get box coordinate and area
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2]
    h = boxes[:, 3]
    areas = w * h

    # check IoU
    inter_xmin = np.maximum(x[1:], x[0])
    inter_ymin = np.maximum(y[1:], y[0])
    inter_xmax = np.minimum(x[1:] + w[1:], x[0] + w[0])
    inter_ymax = np.minimum(y[1:] + h[1:], y[0] + h[0])

    inter_w = np.maximum(0.0, inter_xmax - inter_xmin + 1)
    inter_h = np.maximum(0.0, inter_ymax - inter_ymin + 1)

    inter = inter_w * inter_h
    iou = inter / (areas[1:] + areas[0] - inter)
    return iou

def box_diou(boxes):
    """
    Calculate DIoU value of 1st box with other boxes of a box array
    Reference Paper:
        "Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression"
        https://arxiv.org/abs/1911.08287
    Parameters
    ----------
    boxes: bbox numpy array, shape=(N, 4), xywh
           x,y are top left coordinates
    Returns
    -------
    diou: numpy array, shape=(N-1,)
         IoU value of boxes[1:] with boxes[0]
    """
    # get box coordinate and area
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2]
    h = boxes[:, 3]
    areas = w * h

    # check IoU
    inter_xmin = np.maximum(x[1:], x[0])
    inter_ymin = np.maximum(y[1:], y[0])
    inter_xmax = np.minimum(x[1:] + w[1:], x[0] + w[0])
    inter_ymax = np.minimum(y[1:] + h[1:], y[0] + h[0])

    inter_w = np.maximum(0.0, inter_xmax - inter_xmin + 1)
    inter_h = np.maximum(0.0, inter_ymax - inter_ymin + 1)

    inter = inter_w * inter_h
    iou = inter / (areas[1:] + areas[0] - inter)

    # box center distance
    x_center = x + w/2
    y_center = y + h/2
    center_distance = np.power(x_center[1:] - x_center[0], 2) + np.power(y_center[1:] - y_center[0], 2)

    # get enclosed area
    enclose_xmin = np.minimum(x[1:], x[0])
    enclose_ymin = np.minimum(y[1:], y[0])
    enclose_xmax = np.maximum(x[1:] + w[1:], x[0] + w[0])
    enclose_ymax = np.maximum(x[1:] + w[1:], x[0] + w[0])
    enclose_w = np.maximum(0.0, enclose_xmax - enclose_xmin + 1)
    enclose_h = np.maximum(0.0, enclose_ymax - enclose_ymin + 1)
    # get enclosed diagonal distance
    enclose_diagonal = np.power(enclose_w, 2) + np.power(enclose_h, 2)
    # calculate DIoU, add epsilon in denominator to avoid dividing by 0
    diou = iou - 1.0 * (center_distance) / (enclose_diagonal + np.finfo(float).eps)

    return diou

def nms_boxes(boxes, classes, scores, iou_threshold, confidence=0.1, use_diou=True, is_soft=False, use_exp=False, sigma=0.5):
    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        # handle data for one class
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        # make a data copy to avoid breaking
        # during nms operation
        b_nms = copy.deepcopy(b)
        c_nms = copy.deepcopy(c)
        s_nms = copy.deepcopy(s)

        while len(s_nms) > 0:
            # pick the max box and store, here
            # we also use copy to persist result
            i = np.argmax(s_nms, axis=-1)
            nboxes.append(copy.deepcopy(b_nms[i]))
            nclasses.append(copy.deepcopy(c_nms[i]))
            nscores.append(copy.deepcopy(s_nms[i]))

            # swap the max line and first line
            b_nms[[i,0],:] = b_nms[[0,i],:]
            c_nms[[i,0]] = c_nms[[0,i]]
            s_nms[[i,0]] = s_nms[[0,i]]

            if use_diou:
                iou = box_diou(b_nms)
                #iou = box_diou_matrix(b_nms, b_nms)[0][1:]
            else:
                iou = box_iou(b_nms)
                #iou = box_iou_matrix(b_nms, b_nms)[0][1:]

            # drop the last line since it has been record
            b_nms = b_nms[1:]
            c_nms = c_nms[1:]
            s_nms = s_nms[1:]

            if is_soft:
                # Soft-NMS
                if use_exp:
                    # score refresh formula:
                    # score = score * exp(-(iou^2)/sigma)
                    s_nms = s_nms * np.exp(-(iou * iou) / sigma)
                else:
                    # score refresh formula:
                    # score = score * (1 - iou) if iou > threshold
                    depress_mask = np.where(iou > iou_threshold)[0]
                    s_nms[depress_mask] = s_nms[depress_mask]*(1-iou[depress_mask])
                keep_mask = np.where(s_nms >= confidence)[0]
            else:
                # normal Hard-NMS
                keep_mask = np.where(iou <= iou_threshold)[0]

            # keep needed box for next loop
            b_nms = b_nms[keep_mask]
            c_nms = c_nms[keep_mask]
            s_nms = s_nms[keep_mask]

    # reformat result for output
    nboxes = [np.array(nboxes)]
    nclasses = [np.array(nclasses)]
    nscores = [np.array(nscores)]
    return nboxes, nclasses, nscores

def get_weighted_box(boxes, conf_type='avg'):
    """
    Create weighted box for set of boxes
    :param boxes: set of boxes to fuse
    :param conf_type: type of confidence one of 'avg' or 'max'
    :return: weighted box
    """

    box = np.zeros(6, dtype=np.float32)
    conf = 0
    conf_list = []
    for b in boxes:
        box[2:] += (b[1] * b[2:])
        conf += b[1]
        conf_list.append(b[1])
    box[0] = boxes[0][0]
    if conf_type == 'avg':
        box[1] = conf / len(boxes)
    elif conf_type == 'max':
        box[1] = np.array(conf_list).max()
    box[2:] /= conf
    return box

def bb_intersection_over_union(A, B) -> float:
    xA = max(A[0], B[0])
    yA = max(A[1], B[1])
    xB = min(A[2], B[2])
    yB = min(A[3], B[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA) * max(0, yB - yA)

    if interArea == 0:
        return 0.0

    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (A[2] - A[0]) * (A[3] - A[1])
    boxBArea = (B[2] - B[0]) * (B[3] - B[1])

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def find_matching_box(boxes_list, new_box, match_iou):
    best_iou = match_iou
    best_index = -1
    for i in range(len(boxes_list)):
        box = boxes_list[i]
        if box[0] != new_box[0]:
            continue
        iou = bb_intersection_over_union(box[2:], new_box[2:])
        if iou > best_iou:
            best_index = i
            best_iou = iou

    return best_index, best_iou

def prefilter_boxes(boxes, scores, labels, image_shape, weights, thr):
    # Create dict with boxes stored by its label
    new_boxes = dict()
    height, width = image_shape

    for t in range(len(boxes)):

        if len(boxes[t]) != len(scores[t]):
            print('Error. Length of boxes arrays not equal to length of scores array: {} != {}'.format(len(boxes[t]), len(scores[t])))
            exit()

        if len(boxes[t]) != len(labels[t]):
            print('Error. Length of boxes arrays not equal to length of labels array: {} != {}'.format(len(boxes[t]), len(labels[t])))
            exit()

        for j in range(len(boxes[t])):
            score = scores[t][j]
            if score < thr:
                continue
            label = int(labels[t][j])
            box_part = boxes[t][j]

            # input boxes in raw (x,y,w,h) format,
            # convert to normalized (x1,y1,x2,y2)
            x1 = max(0, float(box_part[0]) / width)
            y1 = max(0, float(box_part[1]) / height)
            x2 = min(1, float(box_part[2] + box_part[0]) / width)
            y2 = min(1, float(box_part[3] + box_part[1]) / height)

            # Box data checks
            #if x2 < x1:
                #warnings.warn('X2 < X1 value in box. Swap them.')
                #x1, x2 = x2, x1
            #if y2 < y1:
                #warnings.warn('Y2 < Y1 value in box. Swap them.')
                #y1, y2 = y2, y1
            #if x1 < 0:
                #warnings.warn('X1 < 0 in box. Set it to 0.')
                #x1 = 0
            #if x1 > 1:
                #warnings.warn('X1 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
                #x1 = 1
            #if x2 < 0:
                #warnings.warn('X2 < 0 in box. Set it to 0.')
                #x2 = 0
            #if x2 > 1:
                #warnings.warn('X2 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
                #x2 = 1
            #if y1 < 0:
                #warnings.warn('Y1 < 0 in box. Set it to 0.')
                #y1 = 0
            #if y1 > 1:
                #warnings.warn('Y1 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
                #y1 = 1
            #if y2 < 0:
                #warnings.warn('Y2 < 0 in box. Set it to 0.')
                #y2 = 0
            #if y2 > 1:
                #warnings.warn('Y2 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
                #y2 = 1
            if (x2 - x1) * (y2 - y1) == 0.0:
                warnings.warn("Zero area box skipped: {}.".format(box_part))
                continue

            b = [int(label), float(score) * weights[t], x1, y1, x2, y2]
            if label not in new_boxes:
                new_boxes[label] = []
            new_boxes[label].append(b)

    # Sort each list in dict by score and transform it to numpy array
    for k in new_boxes:
        current_boxes = np.array(new_boxes[k])
        new_boxes[k] = current_boxes[current_boxes[:, 1].argsort()[::-1]]

    return new_boxes

def weighted_boxes_fusion(boxes_list, labels_list, scores_list, image_shape, weights=None, iou_thr=0.55, skip_box_thr=0.0, conf_type='avg', allows_overflow=False):
    '''
    :param boxes_list: list of boxes predictions from each model, each box is 4 numbers.
    It has 3 dimensions (models_number, model_preds, 4)
    Order of boxes: x1, y1, x2, y2. We expect float normalized coordinates [0; 1]
    :param scores_list: list of scores for each model
    :param labels_list: list of labels for each model
    :param weights: list of weights for each model. Default: None, which means weight == 1 for each model
    :param iou_thr: IoU value for boxes to be a match
    :param skip_box_thr: exclude boxes with score lower than this variable
    :param conf_type: how to calculate confidence in weighted boxes. 'avg': average value, 'max': maximum value
    :param allows_overflow: false if we want confidence score not exceed 1.0
    :return: boxes: boxes coordinates (Order of boxes: x1, y1, x2, y2).
    :return: scores: confidence scores
    :return: labels: boxes labels
    '''

    if weights is None:
        weights = np.ones(len(boxes_list))
    if len(weights) != len(boxes_list):
        print('Warning: incorrect number of weights {}. Must be: {}. Set weights equal to 1.'.format(len(weights), len(boxes_list)))
        weights = np.ones(len(boxes_list))
    weights = np.array(weights)

    if conf_type not in ['avg', 'max']:
        print('Unknown conf_type: {}. Must be "avg" or "max"'.format(conf_type))
        exit()

    image_shape = np.array(image_shape, dtype='float32')
    filtered_boxes = prefilter_boxes(boxes_list, scores_list, labels_list, image_shape, weights, skip_box_thr)
    if len(filtered_boxes) == 0:
        return np.zeros((0, 4)), np.zeros((0,)), np.zeros((0,))

    overall_boxes = []
    for label in filtered_boxes:
        boxes = filtered_boxes[label]
        new_boxes = []
        weighted_boxes = []

        # Clusterize boxes
        for j in range(0, len(boxes)):
            index, best_iou = find_matching_box(weighted_boxes, boxes[j], iou_thr)
            if index != -1:
                new_boxes[index].append(boxes[j])
                weighted_boxes[index] = get_weighted_box(new_boxes[index], conf_type)
            else:
                new_boxes.append([boxes[j].copy()])
                weighted_boxes.append(boxes[j].copy())

        # Rescale confidence based on number of models and boxes
        for i in range(len(new_boxes)):
            if not allows_overflow:
                weighted_boxes[i][1] = weighted_boxes[i][1] * min(weights.sum(), len(new_boxes[i])) / weights.sum()
            else:
                weighted_boxes[i][1] = weighted_boxes[i][1] * len(new_boxes[i]) / weights.sum()
        overall_boxes.append(np.array(weighted_boxes))

    overall_boxes = np.concatenate(overall_boxes, axis=0)
    overall_boxes = overall_boxes[overall_boxes[:, 1].argsort()[::-1]]
    boxes = overall_boxes[:, 2:]
    scores = overall_boxes[:, 1]
    labels = overall_boxes[:, 0]

    # convert boxes back to (x,y,w,h)
    boxes[..., 2:] = boxes[..., 2:] - boxes[..., :2]
    # Scale boxes back to original image shape.
    image_wh = image_shape[..., ::-1]
    boxes[..., :2] *= image_wh
    boxes[..., 2:] *= image_wh

    return [boxes], [labels], [scores]

def fast_cluster_nms_boxes(boxes, classes, scores, iou_threshold, confidence=0.1, use_cluster=True, use_diou=True, use_weighted=True, use_matrix_nms=False, use_spm=False):
    """
    Fast NMS/Cluster NMS/Matrix NMS bbox post process
    Reference Paper:
        1. "YOLACT: Real-time Instance Segmentation"
           https://arxiv.org/abs/1904.02689
        2. "Enhancing Geometric Factors in Model Learning and Inference for Object Detection and Instance Segmentation"
           https://arxiv.org/abs/2005.03572
        3. "SOLOv2: Dynamic, Faster and Stronger"
           https://arxiv.org/abs/2003.10152
        4. Blogpost on zhihu:
           https://zhuanlan.zhihu.com/p/157900024
    Parameters
    ----------
    boxes:   bbox numpy array, shape=(N, 4), xywh
             x,y are top left coordinates
    classes: bbox class index numpy array, shape=(N, 1)
    scores:  bbox score numpy array, shape=(N, 1)
    iou_threshold:
    Returns
    -------
    nboxes:   NMSed bbox numpy array, shape=(N, 4), xywh
              x,y are top left coordinates
    nclasses: NMSed bbox class index numpy array, shape=(N, 1)
    nscores:  NMSed bbox score numpy array, shape=(N, 1)
    """
    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        # handle data for one class
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        # make a data copy to avoid breaking
        # during nms operation
        b_nms = copy.deepcopy(b)
        c_nms = copy.deepcopy(c)
        s_nms = copy.deepcopy(s)

        # ascend sort boxes according to scores
        sorted_indices = np.argsort(s_nms)
        sorted_indices = sorted_indices[::-1]
        b_nms = b_nms[sorted_indices]
        c_nms = c_nms[sorted_indices]
        s_nms = s_nms[sorted_indices]

        # number of boxes for one class
        num_boxes = b_nms.shape[0]

        # get IoU/DIoU matrix (upper triangular matrix)
        if use_diou:
            iou_matrix = box_diou_matrix(b_nms, b_nms)
        else:
            iou_matrix = box_iou_matrix(b_nms, b_nms)
        iou_matrix = np.triu(iou_matrix, k=1)
        max_iou = np.max(iou_matrix, axis=0)
        updated_iou_matrix = copy.deepcopy(iou_matrix)

        # Cluster loop
        if use_cluster:
            for i in range(200):
                prev_iou_matrix = copy.deepcopy(updated_iou_matrix)
                max_iou = np.max(prev_iou_matrix, axis=0)
                keep_diag = np.diag((max_iou < iou_threshold).astype(np.float32))
                updated_iou_matrix = np.dot(keep_diag, iou_matrix)
                if (prev_iou_matrix == updated_iou_matrix).all():
                    break

        if use_matrix_nms:
            # Matrix NMS
            max_iou_expand = np.tile(max_iou, (num_boxes, 1)).T  #(num_boxes)x(num_boxes)

            def get_decay_factor(method='gauss', sigma=0.5):
                if method == 'gauss':
                    # gaussian decay
                    decay_factor = np.exp(-(iou_matrix**2 - max_iou_expand**2) / sigma)
                else:
                    # linear decay
                    decay_factor = (1 - iou_matrix) / (1 - max_iou_expand)

                # decay factor: 1xN
                decay_factor = np.min(decay_factor, axis=0)
                # clamp decay factor to <= 1
                decay_factor = np.minimum(decay_factor, 1.0)
                return decay_factor

            # decay factor for box score
            decay_factor = get_decay_factor()

            # apply decay factor to punish box score,
            # and filter box with confidence threshold
            s_matrix_decay = s_nms * decay_factor
            keep_mask = s_matrix_decay >= confidence

        elif use_spm:
            # apply SPM(Score Penalty Mechanism)
            if use_diou:
                # TODO: Cluster SPM distance NMS couldn't achieve good result, may need to double check
                # currently we fallback to normal SPM
                #
                # Reference:
                # https://github.com/Zzh-tju/CIoU/blob/master/layers/functions/detection.py
                # https://zhuanlan.zhihu.com/p/157900024

                #diou_matrix = box_diou_matrix(b_nms, b_nms)
                #flag = (updated_iou_matrix >= 0).astype(np.float32)
                #penalty_coef = np.prod(np.minimum(np.exp(-(updated_iou_matrix**2)/0.2) + diou_matrix*((updated_iou_matrix>0).astype(np.float32)), flag), axis=0)
                penalty_coef = np.prod(np.exp(-(updated_iou_matrix**2)/0.2), axis=0)
            else:
                penalty_coef = np.prod(np.exp(-(updated_iou_matrix**2)/0.2), axis=0)
            s_spm = penalty_coef * s_nms
            keep_mask = s_spm >= confidence

        else:
            # filter low score box with iou_threshold
            keep_mask = max_iou < iou_threshold

        if use_weighted:
            # generate weights matrix with box score and final IoU matrix
            weights = (updated_iou_matrix*(updated_iou_matrix>iou_threshold).astype(np.float32) + np.eye(num_boxes)) * (s_nms.reshape((1, num_boxes)))

            # convert box format to (xmin,ymin,xmax,ymax) for weighted average,
            # and expand to NxN array
            xmin_expand = np.tile(b_nms[:,0], (num_boxes, 1))  #(num_boxes)x(num_boxes)
            ymin_expand = np.tile(b_nms[:,1], (num_boxes, 1))  #(num_boxes)x(num_boxes)
            xmax_expand = np.tile(b_nms[:,0]+b_nms[:,2], (num_boxes, 1))  #(num_boxes)x(num_boxes)
            ymax_expand = np.tile(b_nms[:,1]+b_nms[:,3], (num_boxes, 1))  #(num_boxes)x(num_boxes)

            # apply weighted average to all the candidate boxes
            weightsum = weights.sum(axis=1)
            xmin_expand = np.true_divide((xmin_expand*weights).sum(axis=1), weightsum)
            ymin_expand = np.true_divide((ymin_expand*weights).sum(axis=1), weightsum)
            xmax_expand = np.true_divide((xmax_expand*weights).sum(axis=1), weightsum)
            ymax_expand = np.true_divide((ymax_expand*weights).sum(axis=1), weightsum)

            # stack the weighted average boxes and convert back to (x,y,w,h)
            b_nms = np.stack([xmin_expand, ymin_expand, xmax_expand-xmin_expand, ymax_expand-ymin_expand], axis=1)

        # keep NMSed boxes
        b_nms = b_nms[keep_mask]
        c_nms = c_nms[keep_mask]
        s_nms = s_nms[keep_mask]

        # merge NMSed boxes to final result
        if len(nboxes) == 0:
            nboxes = np.asarray(copy.deepcopy(b_nms))
            nclasses = np.asarray(copy.deepcopy(c_nms))
            nscores = np.asarray(copy.deepcopy(s_nms))
        else:
            nboxes = np.append(nboxes, copy.deepcopy(b_nms), axis=0)
            nclasses = np.append(nclasses, copy.deepcopy(c_nms), axis=0)
            nscores = np.append(nscores, copy.deepcopy(s_nms), axis=0)

    # reformat result for output
    nboxes = [np.array(nboxes)]
    nclasses = [np.array(nclasses)]
    nscores = [np.array(nscores)]
    return nboxes, nclasses, nscores

def box_iou_matrix(boxes1, boxes2):
    """
    Calculate IoU matrix for two box array.
    Both sets of boxes are expected to be in (x, y, w, h) format.
    Reference implementation:
        https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    Arguments:
        boxes1 (np.array[N, 4])
        boxes2 (np.array[M, 4])
    Returns:
        iou (np.array[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    def box_area(box):
        # box = 4xN
        return box[2] * box[3]

    area1 = box_area(boxes1.T)
    area2 = box_area(boxes2.T)

    inter_min = np.maximum(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    inter_max = np.minimum(boxes1[:, None, :2]+boxes1[:, None, 2:], boxes2[:, :2]+boxes2[:, 2:])  # [N,M,2]
    inter = np.maximum(inter_max - inter_min, 0).prod(axis=-1)  # [N,M]

    iou = inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
    return iou

def box_diou_matrix(boxes1, boxes2):
    """
    Calculate DIoU matrix for two box array.
    Both sets of boxes are expected to be in (x, y, w, h) format.
    Arguments:
        boxes1 (np.array[N, 4])
        boxes2 (np.array[M, 4])
    Returns:
        diou (np.array[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
    iou = box_iou_matrix(boxes1, boxes2)

    # box center distance
    center_distance = (boxes1[:, None, :2]+boxes1[:, None, 2:]/2) - (boxes2[:, :2]+boxes2[:, 2:]/2)  # [N,M,2]
    center_distance = np.power(center_distance[..., 0], 2) + np.power(center_distance[..., 1], 2)  # [N,M]

    # get enclosed area
    enclose_min = np.minimum(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    enclose_max = np.maximum(boxes1[:, None, :2]+boxes1[:, None, 2:], boxes2[:, :2]+boxes2[:, 2:])  # [N,M,2]

    enclose_wh = np.maximum(enclose_max - enclose_min, 0) # [N,M,2]
    enclose_wh = np.maximum(enclose_max - enclose_min, 0) # [N,M,2]

    # get enclosed diagonal distance matrix
    enclose_diagonal = np.power(enclose_wh[..., 0], 2) + np.power(enclose_wh[..., 1], 2)  # [N,M]

    # calculate DIoU, add epsilon in denominator to avoid dividing by 0
    diou = iou - 1.0 * np.true_divide(center_distance, enclose_diagonal + np.finfo(float).eps)

    return diou

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)

In [None]:
def yolo_predict_keras(model, image, anchors, num_classes, model_input_shape, conf_threshold, elim_grid_sense):#, v5_decode):
    image_data = preprocess_image(image, model_input_shape)
    #origin image shape, in (height, width) format
    image_shape = image.size[::-1]

    prediction = model.predict([image_data])
    if type(prediction) is not list:
        prediction = [prediction]

#    if len(anchors) == 5:
#        # YOLOv2 use 5 anchors
#        pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=elim_grid_sense)
#    else:
#        if v5_decode:
#            pred_boxes, pred_classes, pred_scores = yolo5_postprocess_np(prediction, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=True) #enable "elim_grid_sense" by default
#        else:
#            pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=elim_grid_sense)
    pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_input_shape, max_boxes=100, confidence=conf_threshold, elim_grid_sense=elim_grid_sense)

    return pred_boxes, pred_classes, pred_scores

In [None]:
def plot_images(image_path, annotation_file):
    """
    Plots the ground truth and prediction for a given image.

    Parameters:
    - image_path (str): path to the image
    - annotation_file (str): path to the annotation file

    Returns:
    - None
    """
    # Read the annotation file
    with open(annotation_file, 'r') as f:
        annotations = f.readlines()

    # Check if the image is in the annotations file
    for line in annotations:
        if image_path in line:
            # Extract the ground truth boxes
            ground_truth_boxes = line.split(' ')[1:]
            break
    else:
        print(f"{image_path} not found in {annotation_file}.")
        return

    # Read the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Plot the ground truth boxes on the image
    for box in ground_truth_boxes:
        xmin, ymin, xmax, ymax = list(map(int, box.split(',')))[:-1]
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)

    # Get the prediction for the image
    img = Image.open(image_path)
    anchors = get_anchors('/content/drive/MyDrive/RabbitHole/keras-YOLOv3-model-set/configs/yolo3_anchors.txt')
    num_classes = 7
    conf_threshold = 0.4
    model_input_shape = (416,416)
    pred_boxes, pred_classes, pred_scores = yolo_predict_keras(model, img, anchors, num_classes, model_input_shape, conf_threshold, elim_grid_sense=False)

    # Plot the ground truth boxes on the image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    for box in pred_boxes:
      xmin, ymin, xmax, ymax = box
      cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)

    # Plot the images
    fig, ax = plt.subplots(1, 2, figsize=(15, 10))
    ax[0].imshow(image)
    ax[0].set_title(f"Ground truth ({os.path.basename(image_path)})")
    ax[1].imshow(img)
    ax[1].set_title(f"Prediction ({os.path.basename(image_path)})")
    plt.show()

In [None]:
import tensorflow as tf

In [None]:
images = [
    "/content/drive/MyDrive/RabbitHole/data/sequence_1/frame_s1_11.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_1148.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_617.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_905.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_1105.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_581.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_4/frame_s4_195.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_401.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_48.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_193.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_146.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_4/frame_s4_55.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_4/frame_s4_116.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_709.jpg",
    "/content/drive/MyDrive/RabbitHole/data/sequence_3/frame_s3_205.jpg"
    ]
annotation_file = '/content/drive/MyDrive/RabbitHole/data/annotations.txt'

# load model
model, format = load_eval_model('/content/drive/MyDrive/RabbitHole/output/yolo3_mobilenet_lite/yolo3_mobilenet_lite.h5')

for image_path in images:
  plot_images(image_path, annotation_file)

In [None]:
%cd /content/drive/MyDrive/RabbitHole/keras-YOLOv3-model-set

In [None]:
!python eval.py \
--model_path=/content/drive/MyDrive/RabbitHole/output/yolo3_mobilenet_lite/yolo3_mobilenet_lite.h5 \
--anchors_path=configs/yolo3_anchors.txt \
--classes_path=/content/drive/MyDrive/RabbitHole/data/classes.txt \
--model_input_shape=416x416 \
--eval_type=VOC \
--iou_threshold=0.5 \
--conf_threshold=0.3 \
--annotation_file=/content/drive/MyDrive/RabbitHole/data/valid.txt \
--save_result

# LISTA DE AFAZERES



*   Melhorar o Codigo pq pelamor de deus
*   Colocar o codigo do Cleber
*   Fazer os dois Funcionarem juntos.
*   Fazer a Camera lá funcionar com essa Ai
*   Fazer um docker para receber imagem e polir ela com CUDA
*   Servidooor (uhuul)
