In [46]:
# imports
import numpy as np
import pandas as pd
import glob
import cv2 # for capturing videos
import pickle # to save the model
import argparse
import os
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as pl
from PIL import Image
from tqdm import tqdm

# Managing Objects Detection Results
Load YOLOv7 result and transform it to be use as an input of submovement detection model

In [47]:
# CONSTANTS
NB_IMG = 28854 # number of frames used with YOLOv7
PATH = "../data/YOLOv7/labels/"
TXT_FILES = glob.glob(PATH + "*.txt")
# even though the images used with YOLOv7 were 640x640, we resize them to 300x300 to stay consistent with the hands dataset
IMG_HEIGHT = 300 
IMG_WIDTH = 300

In [48]:
# load labels
labels = ["0" if txt_path not in TXT_FILES else open(txt_path, "r").read() for txt_path in tqdm([PATH + "frames" + str(i).zfill(6) + ".txt" for i in range(NB_IMG)])]

100%|██████████| 28854/28854 [00:34<00:00, 843.59it/s] 


In [49]:
# creat a numpy array with the labels
labels = np.array(labels)

In [50]:
# structuring the data
# add and ID column to the labels
labels = np.hstack((np.arange(NB_IMG).reshape(-1, 1), labels.reshape(-1, 1)))
print(labels.shape)
print(labels[8888:8899])


(28854, 2)
[['8888' '0 0.620313 0.560156 0.75625 0.435937 0.950684\n']
 ['8889' '0 0.620313 0.563281 0.75625 0.429688 0.947754\n']
 ['8890' '0 0.620313 0.564062 0.75625 0.428125 0.945801\n']
 ['8891' '0 0.621094 0.565625 0.754687 0.425 0.947266\n']
 ['8892'
  '64 0.553906 0.635938 0.104687 0.046875 0.818359\n0 0.621875 0.56875 0.75625 0.41875 0.944824\n']
 ['8893' '0 0.621875 0.569531 0.753125 0.417188 0.945312\n']
 ['8894' '0 0.621094 0.572656 0.754687 0.414062 0.949707\n']
 ['8895' '0 0.621094 0.575781 0.754687 0.410937 0.94873\n']
 ['8896' '0 0.620313 0.579687 0.75625 0.409375 0.94873\n']
 ['8897' '0 0.619531 0.58125 0.754687 0.40625 0.944336\n']
 ['8898' '0 0.619531 0.582812 0.757812 0.4 0.944824\n']]


In [51]:
# cannot directly replace every "\n" character by a space because strings are immutable in numpy arrays
# so we have to create a new array

# Create a new column with the modified strings
new_column = np.char.replace(labels[:,1], "\n", " ")

# Stack the new column with the original array
np_ready_to_split = np.column_stack((labels[:,0], new_column))

In [52]:
print(np_ready_to_split[8888:8899])

[['8888' '0 0.620313 0.560156 0.75625 0.435937 0.950684 ']
 ['8889' '0 0.620313 0.563281 0.75625 0.429688 0.947754 ']
 ['8890' '0 0.620313 0.564062 0.75625 0.428125 0.945801 ']
 ['8891' '0 0.621094 0.565625 0.754687 0.425 0.947266 ']
 ['8892'
  '64 0.553906 0.635938 0.104687 0.046875 0.818359 0 0.621875 0.56875 0.75625 0.41875 0.944824 ']
 ['8893' '0 0.621875 0.569531 0.753125 0.417188 0.945312 ']
 ['8894' '0 0.621094 0.572656 0.754687 0.414062 0.949707 ']
 ['8895' '0 0.621094 0.575781 0.754687 0.410937 0.94873 ']
 ['8896' '0 0.620313 0.579687 0.75625 0.409375 0.94873 ']
 ['8897' '0 0.619531 0.58125 0.754687 0.40625 0.944336 ']
 ['8898' '0 0.619531 0.582812 0.757812 0.4 0.944824 ']]


In [53]:
# split second column (column with object detected values) by space
new_list = []

for i in range(len(np_ready_to_split)):
    split_values = np_ready_to_split[i][1].split(" ")
    new_list.append(split_values)

np_splitted = np.array(new_list)

  np_splitted = np.array(new_list)


In [54]:
print(np_splitted.shape)

(28854,)


In [55]:
print(np_splitted[8888:8899])

[list(['0', '0.620313', '0.560156', '0.75625', '0.435937', '0.950684', ''])
 list(['0', '0.620313', '0.563281', '0.75625', '0.429688', '0.947754', ''])
 list(['0', '0.620313', '0.564062', '0.75625', '0.428125', '0.945801', ''])
 list(['0', '0.621094', '0.565625', '0.754687', '0.425', '0.947266', ''])
 list(['64', '0.553906', '0.635938', '0.104687', '0.046875', '0.818359', '0', '0.621875', '0.56875', '0.75625', '0.41875', '0.944824', ''])
 list(['0', '0.621875', '0.569531', '0.753125', '0.417188', '0.945312', ''])
 list(['0', '0.621094', '0.572656', '0.754687', '0.414062', '0.949707', ''])
 list(['0', '0.621094', '0.575781', '0.754687', '0.410937', '0.94873', ''])
 list(['0', '0.620313', '0.579687', '0.75625', '0.409375', '0.94873', ''])
 list(['0', '0.619531', '0.58125', '0.754687', '0.40625', '0.944336', ''])
 list(['0', '0.619531', '0.582812', '0.757812', '0.4', '0.944824', ''])]


In [56]:
# group splitted values by 6 (6 values per object detected) --> (class, x, y, w, h, confidence)
# some frame have no object detected, so we have to add a condition to avoid an error
# some other frames have more than one object detected, so we have to create a list of lists to store all the objects detected in one frame
new_list = []
for i in range(len(np_splitted)):
    if (len(np_splitted[i])-1) % 6 == 0:
        new_list.append([np_splitted[i][j:j+6] for j in range(0, len(np_splitted[i]), 6)])
    else:
        new_list.append(["-1", "-1", "-1", "-1", "-1", "-1"])

np_grouped = np.array(new_list)

  np_grouped = np.array(new_list)


In [57]:
print(np_grouped.shape)
print(np_grouped[8888:8899])

(28854,)
[list([['0', '0.620313', '0.560156', '0.75625', '0.435937', '0.950684'], ['']])
 list([['0', '0.620313', '0.563281', '0.75625', '0.429688', '0.947754'], ['']])
 list([['0', '0.620313', '0.564062', '0.75625', '0.428125', '0.945801'], ['']])
 list([['0', '0.621094', '0.565625', '0.754687', '0.425', '0.947266'], ['']])
 list([['64', '0.553906', '0.635938', '0.104687', '0.046875', '0.818359'], ['0', '0.621875', '0.56875', '0.75625', '0.41875', '0.944824'], ['']])
 list([['0', '0.621875', '0.569531', '0.753125', '0.417188', '0.945312'], ['']])
 list([['0', '0.621094', '0.572656', '0.754687', '0.414062', '0.949707'], ['']])
 list([['0', '0.621094', '0.575781', '0.754687', '0.410937', '0.94873'], ['']])
 list([['0', '0.620313', '0.579687', '0.75625', '0.409375', '0.94873'], ['']])
 list([['0', '0.619531', '0.58125', '0.754687', '0.40625', '0.944336'], ['']])
 list([['0', '0.619531', '0.582812', '0.757812', '0.4', '0.944824'], ['']])]


In [58]:
# delete void elements in list of lists (it's an error introduced by the previous step)
new_list = []
for i in range(len(np_grouped)):
    if np_grouped[i][0] != ['0']:
        for j in range(len(np_grouped[i])):
            if np_grouped[i][j] == ['']:
                    np_grouped[i].pop(j)

    new_list.append(np_grouped[i])
    
np_cleaned = np.array(new_list)

  np_cleaned = np.array(new_list)


In [59]:
print(np_cleaned.shape)
print(np_cleaned[8888:8899])

(28854,)
[list([['0', '0.620313', '0.560156', '0.75625', '0.435937', '0.950684']])
 list([['0', '0.620313', '0.563281', '0.75625', '0.429688', '0.947754']])
 list([['0', '0.620313', '0.564062', '0.75625', '0.428125', '0.945801']])
 list([['0', '0.621094', '0.565625', '0.754687', '0.425', '0.947266']])
 list([['64', '0.553906', '0.635938', '0.104687', '0.046875', '0.818359'], ['0', '0.621875', '0.56875', '0.75625', '0.41875', '0.944824']])
 list([['0', '0.621875', '0.569531', '0.753125', '0.417188', '0.945312']])
 list([['0', '0.621094', '0.572656', '0.754687', '0.414062', '0.949707']])
 list([['0', '0.621094', '0.575781', '0.754687', '0.410937', '0.94873']])
 list([['0', '0.620313', '0.579687', '0.75625', '0.409375', '0.94873']])
 list([['0', '0.619531', '0.58125', '0.754687', '0.40625', '0.944336']])
 list([['0', '0.619531', '0.582812', '0.757812', '0.4', '0.944824']])]


In [60]:
# all the objects YOLOv7 can detect
object_classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
         'hair drier', 'toothbrush' ]

# create a dictionary with the names and the corresponding number
names_dict = {object_classes[i]:i for i in range(len(object_classes))}
print(names_dict)

{'person': 0, 'bicycle': 1, 'car': 2, 'motorcycle': 3, 'airplane': 4, 'bus': 5, 'train': 6, 'truck': 7, 'boat': 8, 'traffic light': 9, 'fire hydrant': 10, 'stop sign': 11, 'parking meter': 12, 'bench': 13, 'bird': 14, 'cat': 15, 'dog': 16, 'horse': 17, 'sheep': 18, 'cow': 19, 'elephant': 20, 'bear': 21, 'zebra': 22, 'giraffe': 23, 'backpack': 24, 'umbrella': 25, 'handbag': 26, 'tie': 27, 'suitcase': 28, 'frisbee': 29, 'skis': 30, 'snowboard': 31, 'sports ball': 32, 'kite': 33, 'baseball bat': 34, 'baseball glove': 35, 'skateboard': 36, 'surfboard': 37, 'tennis racket': 38, 'bottle': 39, 'wine glass': 40, 'cup': 41, 'fork': 42, 'knife': 43, 'spoon': 44, 'bowl': 45, 'banana': 46, 'apple': 47, 'sandwich': 48, 'orange': 49, 'broccoli': 50, 'carrot': 51, 'hot dog': 52, 'pizza': 53, 'donut': 54, 'cake': 55, 'chair': 56, 'couch': 57, 'potted plant': 58, 'bed': 59, 'dining table': 60, 'toilet': 61, 'tv': 62, 'laptop': 63, 'mouse': 64, 'remote': 65, 'keyboard': 66, 'cell phone': 67, 'microwave'

In [61]:
# invert the dictionary to get the names from the numbers
obj_class_dict = {i:object_classes[i] for i in range(len(object_classes))}
print(obj_class_dict)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [62]:
print(obj_class_dict.keys())

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79])


In [63]:
# create a numpy array based on obj_class_dict and add a column set to 0 to count the number of objects detected
np_obj_class_dict = np.array([[i, obj_class_dict[i], 0] for i in range(len(obj_class_dict))])
print(np_obj_class_dict)

[['0' 'person' '0']
 ['1' 'bicycle' '0']
 ['2' 'car' '0']
 ['3' 'motorcycle' '0']
 ['4' 'airplane' '0']
 ['5' 'bus' '0']
 ['6' 'train' '0']
 ['7' 'truck' '0']
 ['8' 'boat' '0']
 ['9' 'traffic light' '0']
 ['10' 'fire hydrant' '0']
 ['11' 'stop sign' '0']
 ['12' 'parking meter' '0']
 ['13' 'bench' '0']
 ['14' 'bird' '0']
 ['15' 'cat' '0']
 ['16' 'dog' '0']
 ['17' 'horse' '0']
 ['18' 'sheep' '0']
 ['19' 'cow' '0']
 ['20' 'elephant' '0']
 ['21' 'bear' '0']
 ['22' 'zebra' '0']
 ['23' 'giraffe' '0']
 ['24' 'backpack' '0']
 ['25' 'umbrella' '0']
 ['26' 'handbag' '0']
 ['27' 'tie' '0']
 ['28' 'suitcase' '0']
 ['29' 'frisbee' '0']
 ['30' 'skis' '0']
 ['31' 'snowboard' '0']
 ['32' 'sports ball' '0']
 ['33' 'kite' '0']
 ['34' 'baseball bat' '0']
 ['35' 'baseball glove' '0']
 ['36' 'skateboard' '0']
 ['37' 'surfboard' '0']
 ['38' 'tennis racket' '0']
 ['39' 'bottle' '0']
 ['40' 'wine glass' '0']
 ['41' 'cup' '0']
 ['42' 'fork' '0']
 ['43' 'knife' '0']
 ['44' 'spoon' '0']
 ['45' 'bowl' '0']
 ['46'

In [64]:
# get through the list of lists and count the number of objects detected for each class
for i in tqdm(range(len(np_cleaned))):
    for j in range(len(np_cleaned[i])):
        for k in range(len(np_cleaned[i][j])):
            for key in obj_class_dict.keys():
                if np_cleaned[i][j][k] == str(key):
                    # if there is a match, increment the counter
                    np_obj_class_dict[key][2] = int(np_obj_class_dict[key][2]) + 1 

100%|██████████| 28854/28854 [00:11<00:00, 2467.44it/s]


In [65]:
print(np_obj_class_dict)

[['0' 'person' '26212']
 ['1' 'bicycle' '0']
 ['2' 'car' '0']
 ['3' 'motorcycle' '0']
 ['4' 'airplane' '0']
 ['5' 'bus' '0']
 ['6' 'train' '0']
 ['7' 'truck' '0']
 ['8' 'boat' '0']
 ['9' 'traffic light' '0']
 ['10' 'fire hydrant' '0']
 ['11' 'stop sign' '0']
 ['12' 'parking meter' '0']
 ['13' 'bench' '0']
 ['14' 'bird' '0']
 ['15' 'cat' '0']
 ['16' 'dog' '0']
 ['17' 'horse' '0']
 ['18' 'sheep' '0']
 ['19' 'cow' '0']
 ['20' 'elephant' '0']
 ['21' 'bear' '0']
 ['22' 'zebra' '0']
 ['23' 'giraffe' '0']
 ['24' 'backpack' '0']
 ['25' 'umbrella' '0']
 ['26' 'handbag' '0']
 ['27' 'tie' '0']
 ['28' 'suitcase' '0']
 ['29' 'frisbee' '4']
 ['30' 'skis' '0']
 ['31' 'snowboard' '0']
 ['32' 'sports ball' '1']
 ['33' 'kite' '0']
 ['34' 'baseball bat' '0']
 ['35' 'baseball glove' '0']
 ['36' 'skateboard' '0']
 ['37' 'surfboard' '2']
 ['38' 'tennis racket' '0']
 ['39' 'bottle' '8972']
 ['40' 'wine glass' '0']
 ['41' 'cup' '7']
 ['42' 'fork' '0']
 ['43' 'knife' '4686']
 ['44' 'spoon' '1']
 ['45' 'bowl' '

The objects which the user really have an interraction with, in the salad prep video are :
- refrigerator
- knife
- bowl
- shelf
- lettuce
- cucumber
- tomatoes
- bottle of soap
- plates
- sink
- cloth
- peeler (to peel the cucumber)
- cutting boards

Problem : not all the listed object can be detected (or correctly classified) by YOLOv7

The class name I decided to focus on are : 
- bottle        --> ['39' 'bottle' '8972']
- knife         --> ['43' 'knife' '4686']
- bowl          --> ['45' 'bowl' '8542']
- banana        --> ['46' 'banana' '218'] (there is no babana in the video but the peeled cucumber has been classify as a banana)
- brocoli       --> ['50' 'broccoli' '522'] (same logic as the banana, except here it is the lettuce which has been classify has a brocoli)
- refrigerator  --> ['72' 'refrigerator' '1796']

In [66]:
# get total number of important objects detected (the ones we want to keep : bottle, knife, bowl, banana, brocoli and refrigerator)
obj_to_keep = [39, 43, 45, 46, 50, 72]
total_obj = 0

for i in tqdm(range(len(np_cleaned))):
    for j in range(len(np_cleaned[i])):
        for k in range(len(np_cleaned[i][j])):
            for key in obj_to_keep:
                if np_cleaned[i][j][k] == str(key):
                    total_obj += 1

100%|██████████| 28854/28854 [00:01<00:00, 17215.27it/s]


In [67]:
print(total_obj)

24736


In [68]:
# create a new list with only the objects we want to keep (bottle, knife, bowl, banana, brocoli and refrigerator) with the structure (frame, object detected)

new_list = []
for i in tqdm(range(len(np_cleaned))):
    for j in range(len(np_cleaned[i])):
        for k in range(len(np_cleaned[i][j])):
            for key in obj_to_keep:
                if np_cleaned[i][j][k] == str(key):
                    new_list.append([i, np_cleaned[i][j]])

np_obj_to_keep = np.array(new_list)

100%|██████████| 28854/28854 [00:01<00:00, 23009.73it/s]
  np_obj_to_keep = np.array(new_list)


In [69]:
print(np_obj_to_keep.shape)
print(np_obj_to_keep[:10])

(24736, 2)
[[79
  list(['39', '0.0398437', '0.378906', '0.0578125', '0.0859375', '0.819336'])]
 [84
  list(['39', '0.0507812', '0.377344', '0.0546875', '0.0828125', '0.811035'])]
 [85
  list(['39', '0.0539063', '0.377344', '0.0546875', '0.0828125', '0.817871'])]
 [86
  list(['39', '0.0546875', '0.377344', '0.053125', '0.0828125', '0.853516'])]
 [87
  list(['39', '0.0578125', '0.375781', '0.053125', '0.0828125', '0.882812'])]
 [88
  list(['39', '0.0601562', '0.375', '0.0515625', '0.08125', '0.888184'])]
 [89
  list(['39', '0.0632813', '0.375', '0.0515625', '0.08125', '0.865234'])]
 [90 list(['39', '0.0671875', '0.375', '0.05', '0.08125', '0.838867'])]
 [91
  list(['39', '0.0703125', '0.374219', '0.05', '0.0828125', '0.812988'])]
 [93
  list(['39', '0.078125', '0.372656', '0.046875', '0.0828125', '0.872559'])]]


In [70]:
# check if a frame id is used more than once and check how many times it is used
id = 0
id2 = 0
id3 = 0
for i in tqdm(range(len(np_obj_to_keep))):
    if id == np_obj_to_keep[i][0]:
        if id2 == np_obj_to_keep[i][0]:
            print("3 objects detected == > id = " + str(id))
            if id3 == np_obj_to_keep[i][0]:
                id3 = np_obj_to_keep[i][0]
                print("4 objects detected ==> id = " + str(id))
            else :
                id3 = np_obj_to_keep[i][0]
        else :
            id2 = np_obj_to_keep[i][0]
    else:
        id = np_obj_to_keep[i][0]


100%|██████████| 24736/24736 [00:00<00:00, 735765.58it/s]

3 objects detected == > id = 11326
3 objects detected == > id = 12210
3 objects detected == > id = 12211
3 objects detected == > id = 12212
3 objects detected == > id = 15047
3 objects detected == > id = 15049
3 objects detected == > id = 19056
3 objects detected == > id = 19064
3 objects detected == > id = 19066
3 objects detected == > id = 19073
3 objects detected == > id = 21764
3 objects detected == > id = 21765
3 objects detected == > id = 22100
3 objects detected == > id = 22101
3 objects detected == > id = 22102
3 objects detected == > id = 22633
3 objects detected == > id = 22634
3 objects detected == > id = 22635
3 objects detected == > id = 22636
3 objects detected == > id = 22637
3 objects detected == > id = 22638
3 objects detected == > id = 22639
3 objects detected == > id = 22640
3 objects detected == > id = 22763
3 objects detected == > id = 22765
3 objects detected == > id = 22767
3 objects detected == > id = 22868
3 objects detected == > id = 22897
3 objects detected =




In [71]:
# example of frame with 3 objects detected (id = 11326)
print(np_obj_to_keep[9076:9081])

[[11325
  list(['45', '0.882031', '0.523438', '0.173438', '0.140625', '0.810547'])]
 [11326
  list(['45', '0.892969', '0.530469', '0.176563', '0.142188', '0.818359'])]
 [11326
  list(['72', '0.290625', '0.338281', '0.225', '0.239063', '0.824707'])]
 [11326
  list(['43', '0.951563', '0.678125', '0.040625', '0.1', '0.853516'])]
 [11327
  list(['43', '0.960156', '0.684375', '0.0390625', '0.09375', '0.811035'])]]


In [72]:
print(np_obj_to_keep[9076][0])

11325


Previous test show me that the maximum of object detected which we want to keep, in one single frame, is 3.

Ex : in frame 11'326 there is a knife, a bowl and a refrigerator

Since a frame can contains 3 objects, the dataframe will have to have this structure : (frame, [class_1, x, y, w, h, confidence], [class_2, x, y, w, h, confidence], [class_3, x, y, w, h, confidence])

In [73]:
print(type(np_cleaned[9076][0][0]))
print(np_cleaned[9076][0][0])

print(type(int(np_cleaned[9076][0][0])))
print(int(np_cleaned[9076][0][0]))

<class 'str'>
0
<class 'int'>
0


In [74]:
frame_objects = {} # create a dictionary with the frame id as key and the objects detected as value
obj_to_keep = [39, 43, 45, 46, 50, 72] # list of objects we want to keep

for i in tqdm(range(len(np_cleaned))):
    if i in np_obj_to_keep[:, 0]: # if the frame contains at least one object we want to keep
        if i not in frame_objects:  # check if the key exists in the dictionary
            frame_objects[i] = [] # if not, add the key with an empty list as value
        for j in range(len(np_cleaned[i])):
            if int(np_cleaned[i][j][0]) in obj_to_keep: # if the object detected is one we want to keep
                obj = np_cleaned[i][j]
                frame_objects[i].append(obj)
    else:
        frame_objects[i] = [] # if the frame doesn't contain any object we want to keep, add an empty list

# Convert the dictionary to a numpy array
final = np.array([[frame, obj] for frame, obj in frame_objects.items()])

100%|██████████| 28854/28854 [00:12<00:00, 2254.85it/s]
  final = np.array([[frame, obj] for frame, obj in frame_objects.items()])


In [75]:
print(final.shape)
print(final[11320:11330])

(28854, 2)
[[11320
  list([['45', '0.8375', '0.498437', '0.16875', '0.134375', '0.851074']])]
 [11321
  list([['45', '0.846094', '0.502344', '0.170312', '0.135937', '0.814941'], ['43', '0.915625', '0.659375', '0.0375', '0.125', '0.837402']])]
 [11322
  list([['45', '0.853125', '0.507031', '0.171875', '0.135937', '0.831055'], ['43', '0.921875', '0.661719', '0.0375', '0.120312', '0.880371']])]
 [11323
  list([['43', '0.928125', '0.666406', '0.0375', '0.117188', '0.807129'], ['45', '0.8625', '0.511719', '0.16875', '0.135937', '0.814453']])]
 [11324 list([])]
 [11325
  list([['45', '0.882031', '0.523438', '0.173438', '0.140625', '0.810547']])]
 [11326
  list([['45', '0.892969', '0.530469', '0.176563', '0.142188', '0.818359'], ['72', '0.290625', '0.338281', '0.225', '0.239063', '0.824707'], ['43', '0.951563', '0.678125', '0.040625', '0.1', '0.853516']])]
 [11327
  list([['43', '0.960156', '0.684375', '0.0390625', '0.09375', '0.811035'], ['72', '0.297656', '0.338281', '0.220313', '0.239063',

In [76]:
print(final[11329][1][0][1])
print(final[11329][1][0][2])
print(final[11329][1][0][3])
print(final[11329][1][0][4])

0.917969
0.552344
0.164062
0.145313


In [77]:
# adapt the x and y coordinates and the width and height of the bounding box to the size of the image (640x640)
# objet = [class, x, y, width, height, confidence]

# create a copy of the array to keep the original one
final_copy = final.copy()

for i in tqdm(range(len(final_copy))):
    for j in range(len(final_copy[i][1])):
        final_copy[i][1][j][1] = round(float(final_copy[i][1][j][1]) * IMG_WIDTH, 4)
        final_copy[i][1][j][2] = round(float(final_copy[i][1][j][2]) * IMG_HEIGHT, 4)
        final_copy[i][1][j][3] = round(float(final_copy[i][1][j][3]) * IMG_WIDTH, 4)
        final_copy[i][1][j][4] = round(float(final_copy[i][1][j][4]) * IMG_HEIGHT, 4)

100%|██████████| 28854/28854 [00:00<00:00, 54186.93it/s] 


In [78]:
print(final_copy[11329][1][0][1])
print(final_copy[11329][1][0][2])
print(final_copy[11329][1][0][3])
print(final_copy[11329][1][0][4])

275.3907
165.7032
49.2186
43.5939


In [79]:
# create a pandas dataframe with the numpy array and delete the first column
df_final = pd.DataFrame(final_copy)
df_final = df_final.drop(columns=0)

In [80]:
print(df_final.shape)
print(df_final)

(28854, 1)
                                                       1
0                                                     []
1                                                     []
2                                                     []
3                                                     []
4                                                     []
...                                                  ...
28849                                                 []
28850  [[43, 181.875, 187.7343, 12.1875, 58.5936, 0.8...
28851  [[43, 182.3439, 187.2657, 12.1875, 58.5936, 0....
28852  [[43, 182.3439, 187.0314, 12.1875, 59.0625, 0....
28853  [[43, 182.5782, 187.0314, 11.7188, 59.0625, 0....

[28854 rows x 1 columns]


In [81]:
# print the number of frame with at least one object detected
nb_frame_with_obj = len(df_final[df_final[1].map(len) > 0])
percentage = round(nb_frame_with_obj / len(df_final) * 100, 2)
print("Number of frame with at least one object detected : " + str(nb_frame_with_obj) + " (" + str(percentage) + "%)")

Number of frame with at least one object detected : 21068 (73.02%)


In [82]:
# save the dataframe to a csv file
df_final.to_csv('objects_detected.csv', index=False, header=False)