In [1]:
import cv2
import argparse
import numpy as np
import utils
import os 
import time
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array
import gc
import pandas as pd

In [2]:
print('tensorflow version:', tf.__version__)

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tf.per_process_gpu_memory_fraction = 0.3

tensorflow version: 2.2.0-rc2


In [3]:
%%time
# utils.extract_frames("/data/TrainSet/Drone1/Morning/1.1.1.mov", out_dir='/data/out', save=True, skip=30)
utils.extract_frames("/data/archives/1.1.11.MP4", out_dir='/data/out', save=True, skip=30)

Frames saved to `/data/out/` directory.
CPU times: user 60 s, sys: 637 ms, total: 1min
Wall time: 20.1 s


In [4]:
gc.collect()

22

In [5]:
def load_labels(path):
    """ Load labels from path, removes useless boxes and keep only usefull columns
    """
    labels = pd.read_csv(path)
    labels = labels[labels['lost'] != 1]
    labels = labels[labels['occluded'] != 1]
    labels = labels[['frame','video']]
    labels['class'] = 0
    return labels.drop_duplicates()

num_video = '1.1.11'
# labels_path = '/data/TrainSet/Labels/SingleActionLabels/train_labels.csv'
labels_path = '/data/train_labels.csv'
labels = load_labels(labels_path)
labels = labels[labels['video'] == num_video]
np_labels = labels.to_numpy()
label_max = np.amax(np_labels[:,0])

for frame in range(label_max):
    if frame not in np_labels[:,0]:
        np_labels = np.vstack((np_labels, [frame, num_video, -1]))
        np_labels[-1][0] = int(np_labels[-1][0])
        np_labels[-1][2] = int(np_labels[-1][2])

In [6]:
%%time
# read input image
list_images = list()
list_images = utils.get_files_by_ext("/data/out/", "jpg")
print("We got ", len(list_images), " images")

We got  21  images
CPU times: user 2.87 ms, sys: 0 ns, total: 2.87 ms
Wall time: 1.62 ms


In [7]:
%%time
classes_file = "object-detection-opencv/yolov3.txt"
weights_file = "object-detection-opencv/yolov3.weights"
conf_file = "object-detection-opencv/yolov3.cfg"

conf_threshold = 0.5
nms_threshold = 0.4

# for image_path in list_images:
image = cv2.imread(list_images[0])

Width = image.shape[1]
Height = image.shape[0]

scale = 0.00392

# read class names from text file
classes = None
with open(classes_file, 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# generate different colors for different classes 
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

# read pre-trained model and config file
# reads the weights and config file and creates the network.
# net = cv2.dnn.readNet(weights_file, conf_file)
model_path = "/data/model/yolov3.h5"
nb_out_layer = 3
model = tf.keras.models.load_model(model_path)

CPU times: user 2min 36s, sys: 838 ms, total: 2min 37s
Wall time: 2min 38s


In [8]:
model.output

[<tf.Tensor 'tf_op_layer_concat_4/Identity:0' shape=(None, None, None, 3, None) dtype=float32>,
 <tf.Tensor 'tf_op_layer_concat_7/Identity:0' shape=(None, None, None, 3, None) dtype=float32>,
 <tf.Tensor 'tf_op_layer_concat_10/Identity:0' shape=(None, None, None, 3, None) dtype=float32>]

In [20]:

def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):

    valid_scale=[0, np.inf]
    pred_bbox = np.array(pred_bbox)

    pred_xywh = pred_bbox[:, 0:4]
    pred_conf = pred_bbox[:, 4]
    pred_prob = pred_bbox[:, 5:]

    # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
    pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
                                pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
    # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
    org_h, org_w = org_img_shape
    resize_ratio = min(input_size / org_w, input_size / org_h)

    dw = (input_size - resize_ratio * org_w) / 2
    dh = (input_size - resize_ratio * org_h) / 2

    pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
    pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

    # # (3) clip some boxes those are out of range
    pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
                                np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
    invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
    pred_coor[invalid_mask] = 0

    # # (4) discard some invalid boxes
    bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
    scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))

    # # (5) discard some boxes with low scores
    classes = np.argmax(pred_prob, axis=-1)
    scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
    score_mask = scores > score_threshold
    mask = np.logical_and(scale_mask, score_mask)
    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]

    return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)

In [21]:
def detect_object(outs, list_images, Width, Height, nb_out_layer):    
    i = 0
    dict_obj_detected = {}

    # for each detetion from each output layer 
    # get the confidence, class id, bounding box params
    # and ignore weak detections (confidence < 0.5)
    for out in outs:
#         #Dimension1 = Number of Images
#         #Dimension2 = X_out_grid * Y_out_grid * nb_out_layer
#         #Dimension3 = 5 + nb_classes
        out = out.reshape(out.shape[0],\
                          out.shape[1]*out.shape[2]*nb_out_layer,\
                          int(out.shape[4])
                         )
        
        for image in out:
            image_name = list_images[i]
            if not image_name in dict_obj_detected:
                dict_obj_detected[image_name] = {}
                dict_obj_detected[image_name]["class_ids"] = list()
                dict_obj_detected[image_name]["confidences"] = list()
                dict_obj_detected[image_name]["boxes"] = list()
            for detection in image:
                
                print(type(confidence))
                lol()
                
                confidence = detection[4]
                
                scores = detection[5:]
                class_id = np.argmax(scores)
#                 confidence = scores[class_id]
                if confidence > 0.2 and class_id == 0:
#                     center_x = int(detection[0] * Width)
#                     center_y = int(detection[1] * Height)
#                     w = int(detection[2] * Width)
#                     h = int(detection[3] * Height)
#                     x = center_x - w / 2
#                     y = center_y - h / 2
                    
                    x = detection[0] * (Width / 416)
                    y = detection[1] * (Height / 416)
                    w = detection[2] * (Width / 416)
                    h = detection[3] * (Height / 416)
                    
                    dict_obj_detected[image_name]["class_ids"].append(class_id)
                    dict_obj_detected[image_name]["confidences"].append(float(confidence))
                    dict_obj_detected[image_name]["boxes"].append([x, y, w, h])
            i += 1
        i = 0 

    return dict_obj_detected

In [22]:
gc.collect()

0

In [23]:
%%time
batch_size = 4
batch_images = list()
total_elapsed_time_detection = 0 
total_elapsed_time_compute_metrics = 0
true_positive = 0 # Good prediction + Prediction said Poacher
true_negative = 0 # Good prediction + Prediction said no Poacher
false_positive = 0 # False prediction + Prediction said Poacher 
false_negative = 0 # False prediction + Prediction said no Poacher

for i in range(len(list_images)):
# for i in range(0, 10):
    batch_images.append(list_images[i])
    if i == len(list_images)-1 or len(batch_images)%batch_size==0:      
        print("------ Start Analyzing from ", i, " ------")
        time_min_blobing = time.time()
        # create input blob 
        # prepares the input image to run through the deep neural network
        blob = utils.image_to_matrix(batch_images,resize_shape=(416,416))
        blob = blob.transpose((0,2,3,1))
        
        # set input blob for the network
#         net.setInput(blob)
        time_max_blobing = time.time()
        elapsed_time_blobing = time_max_blobing - time_min_blobing
        print("The blobing phase took ", round(elapsed_time_blobing,2), "s to be executed")    
        
        time_min_detection = time.time()
        # run inference through the network
        # and gather predictions from output layers
#         outs = net.forward(utils.get_output_layers(net))
        outs = model.predict(blob)

        # identify objects into the list of images analyzed by the network
        dict_obj_detected = {}
        dict_obj_detected = detect_object(outs, batch_images, Width, Height, nb_out_layer)

        # identify pictures with at least one perso
        dict_danger = utils.detect_danger(dict_obj_detected)
#         for image_name, is_dangerous in dict_danger.items():
#             print("Is", image_name, "dangerous ? \t\t\t" , is_dangerous)
        time_max_detection = time.time()
        elapsed_time_detection = time_max_detection - time_min_detection
        total_elapsed_time_detection += elapsed_time_detection
        print("The detection took ", round(elapsed_time_detection,2), "s to be executed")
        
        #  Compute Metrics
        time_min_compute_metrics = time.time()
        for image_name, is_dangerous in dict_danger.items():
            video_number, frame_number = utils.get_video_frame_number(image_name)
            index_tuple = np.where((np_labels[:,0] == frame_number) & (np_labels[:,1] == video_number))
            index = index_tuple[0][0]
            y_true = np_labels[index,2]
            if ((y_true == 0) & (is_dangerous)): # Get true_positive
                true_positive += 1
            elif ((y_true != 0) & (is_dangerous == False)): # Get true_negative
                true_negative += 1
            elif ((y_true != 0) & (is_dangerous)): # Get false_positive
                false_positive += 1
            elif ((y_true == 0) & (is_dangerous == False)): # Get false_negative
                false_negative += 1     
        time_max_compute_metrics = time.time()
        elapsed_time_compute_metrics = time_max_compute_metrics - time_min_compute_metrics
        total_elapsed_time_compute_metrics += elapsed_time_compute_metrics
        print("The metrics computation took ", round(elapsed_time_compute_metrics,2), "s to be executed")
        
        
        time_min_draw_bbox = time.time()
        # Draw bounding boxes and save images
        for image_path, image_items in dict_obj_detected.items():
            utils.get_bounding_box(image_path, image_items, \
                                   classes, COLORS, conf_threshold, nms_threshold
                                  )
        time_max_draw_bbox = time.time()
        elapsed_time_draw_bbox = time_max_draw_bbox - time_min_detection
        print("The draw of the bounding boxes and the image save took ", round(elapsed_time_draw_bbox,2), "s to be executed")         
        print("---------------------------------------------------------------------")
        
        batch_images = list()
    gc.collect()

print("---------------------------------------------------------------------")
print("---------------------------------------------------------------------")
print("It took ", round(total_elapsed_time_detection, 2), "s to do all the detections")
# print("It took ", round(total_elapsed_time_compute_metrics, 2), "s to compute all the metrics")
print("---------------------------------------------------------------------")
print("---------------------------------------------------------------------")


print("true_positive = ", true_positive)
print("true_negative = ", true_negative)
print("false_positive = ", false_positive)
print("false_negative = ", false_negative)

------ Start Analyzing from  3  ------
The blobing phase took  0.61 s to be executed


UnboundLocalError: local variable 'confidence' referenced before assignment

In [13]:
for idx, val in dict_obj_detected.items():
    print(len(val['class_ids']))
    print((val['boxes']))    

0
[]


In [11]:
total_pred = len(list_images)
total_pred_pos = true_positive + false_positive
total_real_pos = true_positive + false_negative
accuracy = (true_positive + true_negative) / (total_pred) *100
precision = (true_positive / total_pred_pos)*100 # Pertinence des alertes
recall = (true_positive / total_real_pos)*100 # 

print("accuracy = ", round(accuracy,2), "%")
print("precision = " , round(precision,2), "%")
print("recall = ", round(recall,2), "%")

accuracy =  100.0 %
precision =  100.0 %
recall =  100.0 %


In [15]:
utils.convert_frames_to_video("/data/out/output_with_bounding_box/", \
                              "/data/out/output_video/1.1.1_withBB.mp4", \
                              5, (Width,Height))

100%|██████████| 21/21 [00:07<00:00,  2.69it/s]


In [13]:
np_labels

array([[0, '1.1.11', 0],
       [1, '1.1.11', 0],
       [2, '1.1.11', 0],
       ...,
       [601, '1.1.11', 0],
       [602, '1.1.11', 0],
       [603, '1.1.11', 0]], dtype=object)