In [1]:
import cv2
import argparse
import numpy as np
import utils
import os 
import time
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array
import gc
import pandas as pd

In [2]:
%%time
utils.extract_frames("/data/TrainSet/Drone1/Morning/1.1.1.mov", out_dir='/data/out', save=True, skip=30)

Frames saved to `/data/out/` directory.
CPU times: user 3min 35s, sys: 5.48 s, total: 3min 41s
Wall time: 2min 31s


In [2]:
def load_labels(path):
    """ Load labels from path, removes useless boxes and keep only usefull columns
    """
    labels = pd.read_csv(path)
    labels = labels[labels['lost'] != 1]
    labels = labels[labels['occluded'] != 1]
    labels = labels[['frame','video']]
    labels['class'] = 0
    return labels.drop_duplicates()

num_video = '1.1.1'
labels_path = '/data/TrainSet/Labels/SingleActionLabels/train_labels.csv'
labels = load_labels(labels_path)
labels = labels[labels['video'] == num_video]
np_labels = labels.to_numpy()
label_max = np.amax(np_labels[:,0])

for frame in range(label_max):
    if frame not in np_labels[:,0]:
        np_labels = np.vstack((np_labels, [frame, num_video, -1]))
        np_labels[-1][0] = int(np_labels[-1][0])
        np_labels[-1][2] = int(np_labels[-1][2])

In [3]:
%%time
# read input image
list_images = list()
list_images = utils.get_files_by_ext("/data/out/", "jpg")
print("We got ", len(list_images), " images")

We got  75  images
CPU times: user 3.25 ms, sys: 1.09 ms, total: 4.34 ms
Wall time: 34.5 ms


In [4]:
%%time
classes_file = "object-detection-opencv/yolov3.txt"
weights_file = "object-detection-opencv/yolov3.weights"
conf_file = "object-detection-opencv/yolov3.cfg"

conf_threshold = 0.5
nms_threshold = 0.4

# for image_path in list_images:
image = cv2.imread(list_images[0])

Width = image.shape[1]
Height = image.shape[0]

scale = 0.00392

# read class names from text file
classes = None
with open(classes_file, 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# generate different colors for different classes 
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

# read pre-trained model and config file
# reads the weights and config file and creates the network.
net = cv2.dnn.readNet(weights_file, conf_file)
# model_path = "object-detection-opencv/yolov3-tiny.h5"
nb_out_layer = 3
# model = tf.keras.models.load_model(model_path)

CPU times: user 135 ms, sys: 311 ms, total: 446 ms
Wall time: 2.08 s


In [5]:
%%time
batch_size = 10
batch_images = list()
total_elapsed_time_detection = 0 
total_elapsed_time_compute_metrics = 0
true_positive = 0 # Good prediction + Prediction said Poacher
true_negative = 0 # Good prediction + Prediction said no Poacher
false_positive = 0 # False prediction + Prediction said Poacher 
false_negative = 0 # False prediction + Prediction said no Poacher

for i in range(len(list_images)):
# for i in range(0, 10):
    batch_images.append(list_images[i])
    if i == len(list_images)-1 or len(batch_images)%batch_size==0:      
        print("------ Start Analyzing from ", i, " ------")
        time_min_blobing = time.time()
        # create input blob 
        # prepares the input image to run through the deep neural network
        blob = utils.image_to_matrix(batch_images,resize_shape=(224,224))
#         blob = blob.transpose((0,2,3,1))
        
        # set input blob for the network
        net.setInput(blob)
        time_max_blobing = time.time()
        elapsed_time_blobing = time_max_blobing - time_min_blobing
        print("The blobing phase took ", round(elapsed_time_blobing,2), "s to be executed")    
        
        time_min_detection = time.time()
        # run inference through the network
        # and gather predictions from output layers
        outs = net.forward(utils.get_output_layers(net))
#         outs = model.predict(blob)

        # identify objects into the list of images analyzed by the network
        dict_obj_detected = {}
        dict_obj_detected = utils.detect_object(outs, batch_images, Width, Height, nb_out_layer)

        # identify pictures with at least one perso
        dict_danger = utils.detect_danger(dict_obj_detected)
#         for image_name, is_dangerous in dict_danger.items():
#             print("Is", image_name, "dangerous ? \t\t\t" , is_dangerous)
        time_max_detection = time.time()
        elapsed_time_detection = time_max_detection - time_min_detection
        total_elapsed_time_detection += elapsed_time_detection
        print("The detection took ", round(elapsed_time_detection,2), "s to be executed")
        
        #  Compute Metrics
        time_min_compute_metrics = time.time()
        for image_name, is_dangerous in dict_danger.items():
            video_number, frame_number = utils.get_video_frame_number(image_name)
            index_tuple = np.where((np_labels[:,0] == frame_number) & (np_labels[:,1] == video_number))
            index = index_tuple[0][0]
            y_true = np_labels[index,2]
            if ((y_true == 0) & (is_dangerous)): # Get true_positive
                true_positive += 1
            elif ((y_true != 0) & (is_dangerous == False)): # Get true_negative
                true_negative += 1
            elif ((y_true != 0) & (is_dangerous)): # Get false_positive
                false_positive += 1
            elif ((y_true == 0) & (is_dangerous == False)): # Get false_negative
                false_negative += 1     
        time_max_compute_metrics = time.time()
        elapsed_time_compute_metrics = time_max_compute_metrics - time_min_compute_metrics
        total_elapsed_time_compute_metrics += elapsed_time_compute_metrics
        print("The metrics computation took ", round(elapsed_time_compute_metrics,2), "s to be executed")
        
        
        time_min_draw_bbox = time.time()
        # Draw bounding boxes and save images
        for image_path, image_items in dict_obj_detected.items():
            utils.get_bounding_box(image_path, image_items, \
                                   classes, COLORS, conf_threshold, nms_threshold
                                  )
        time_max_draw_bbox = time.time()
        elapsed_time_draw_bbox = time_max_draw_bbox - time_min_detection
        print("The draw of the bounding boxes and the image save took ", round(elapsed_time_draw_bbox,2), "s to be executed")         
        print("---------------------------------------------------------------------")
        
        batch_images = list()

print("---------------------------------------------------------------------")
print("---------------------------------------------------------------------")
print("It took ", round(total_elapsed_time_detection, 2), "s to do all the detections")
# print("It took ", round(total_elapsed_time_compute_metrics, 2), "s to compute all the metrics")
print("---------------------------------------------------------------------")
print("---------------------------------------------------------------------")


print("true_positive = ", true_positive)
print("true_negative = ", true_negative)
print("false_positive = ", false_positive)
print("false_negative = ", false_negative)

------ Start Analyzing from  9  ------
The blobing phase took  1.38 s to be executed
The detection took  4.59 s to be executed
The metrics computation took  0.0 s to be executed
The draw of the bounding boxes and the image save took  10.64 s to be executed
---------------------------------------------------------------------
------ Start Analyzing from  19  ------
The blobing phase took  1.64 s to be executed
The detection took  3.93 s to be executed
The metrics computation took  0.0 s to be executed
The draw of the bounding boxes and the image save took  9.91 s to be executed
---------------------------------------------------------------------
------ Start Analyzing from  29  ------
The blobing phase took  1.55 s to be executed
The detection took  4.41 s to be executed
The metrics computation took  0.0 s to be executed
The draw of the bounding boxes and the image save took  10.21 s to be executed
---------------------------------------------------------------------
------ Start Analy

In [6]:
total_pred = len(list_images)
total_pred_pos = true_positive + false_positive
total_real_pos = true_positive + false_negative
accuracy = (true_positive + true_negative) / (total_pred) *100
precision = (true_positive / total_pred_pos)*100 # Pertinence des alertes
recall = (true_positive / total_real_pos)*100 # 

print("accuracy = ", round(accuracy,2), "%")
print("precision = " , round(precision,2), "%")
print("recall = ", round(recall,2), "%")

accuracy =  61.33 %
precision =  100.0 %
recall =  61.33 %


In [7]:
utils.convert_frames_to_video("/data/out/output_with_bounding_box/", \
                              "/data/out/output_video/1.1.1_withBB.mp4", \
                              24, (Width,Height))

100%|██████████| 75/75 [00:22<00:00,  3.30it/s]


In [15]:
np_labels

array([[0, '2.2.4', 0],
       [1, '2.2.4', 0],
       [2, '2.2.4', 0],
       ...,
       [1247, '2.2.4', -1],
       [1248, '2.2.4', -1],
       [1249, '2.2.4', -1]], dtype=object)