# Dataset Creation

In [1]:
import os
from math import log10
import random
import json
import cv2

## Defining the Functions Needed

In [2]:
def left_pad_number(number: int, target_len: int=6):
    if number == 0:
        return '0' * 6
    elif number > 999999:
        return str(number)
    else:
        return '0' * (target_len - int(log10(number)) - 1) + str(number)

def get_file_name_without_extension(file_path: str):
    return ''.join(file_path.split('/')[-1].split('.')[:-1])

In [3]:
name = get_file_name_without_extension('datasets/MOBDrone/videos/DJI_0804_0001_30m_1.mp4')
print(name)
print(f'{name}_{left_pad_number(21)}.PNG')

DJI_0804_0001_30m_1
DJI_0804_0001_30m_1_000021.PNG


In [4]:
def split_video_into_images(video_path, frames_to_take: int=-1):
    # DJI_0804_0001_30m_1_000004.PNG
    video_name = get_file_name_without_extension(video_path)
    video = cv2.VideoCapture(video_path)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indexes = None
    if frames_to_take == -1:
        frame_indexes = range(num_frames)
    else:
        frame_indexes = random.sample(range(num_frames), frames_to_take)
    ans = []
    for f in frame_indexes:
        video.set(cv2.CAP_PROP_POS_FRAMES, f)
        error, image = video.read()
        image_path = f'{video_name}_{left_pad_number(f)}.PNG'
        ans.append({
            'path': image_path,
            'image': image
        })
    return ans

In [5]:
split_video_into_images(
    'D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/videos/DJI_0804_0001_30m_1.mp4',
    5
)

[{'path': 'DJI_0804_0001_30m_1_000440.PNG',
  'image': array([[[ 65,  75,  30],
          [ 65,  75,  30],
          [ 65,  75,  30],
          ...,
          [ 72,  79,  30],
          [ 72,  79,  30],
          [ 72,  79,  30]],
  
         [[ 65,  75,  30],
          [ 65,  75,  30],
          [ 65,  75,  30],
          ...,
          [ 72,  79,  30],
          [ 72,  79,  30],
          [ 72,  79,  30]],
  
         [[ 65,  75,  30],
          [ 65,  75,  30],
          [ 65,  75,  30],
          ...,
          [ 72,  79,  30],
          [ 72,  79,  30],
          [ 72,  79,  30]],
  
         ...,
  
         [[ 66,  79,  32],
          [ 66,  79,  32],
          [ 66,  79,  32],
          ...,
          [ 85,  86,  49],
          [ 99, 100,  63],
          [104, 105,  68]],
  
         [[ 66,  80,  30],
          [ 66,  80,  30],
          [ 66,  80,  30],
          ...,
          [ 82,  83,  46],
          [ 95,  96,  59],
          [ 99, 100,  63]],
  
         [[ 64,  78,  28]

In [6]:
def get_all_files_from_folder(folder_path):
    return [
        os.path.join(folder_path, file).replace('\\', '/')
        for file in os.listdir(folder_path)
        if os.path.isfile(os.path.join(folder_path, file))
    ]

## Taking Random frames from each video

In [7]:
def get_images_for_each_video_from_folder(folder_path, frames_to_take: int=-1):
    return [
        image for video_path in get_all_files_from_folder(folder_path)
        for image in split_video_into_images(video_path, frames_to_take)
    ]

In [8]:
images = get_images_for_each_video_from_folder('D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/videos', 1)

## Splitting into test and training datasets

- [the mobdrone site](https://aimh.isti.cnr.it/dataset/mobdrone/) says that files starting with “DJI_0804” (total: 37,604 images) are testing images and the files starting with “DJI_0915” (total: 88,568 images) are training images
- training/testing on every single image will not be possible on my laptop, so the dataset will be created by random sampling each video
- the training/testing will be performed on this random sample, until i can find a way to store and train the complete dataset

In [9]:
def image_type(image_path):
    image_name = get_file_name_without_extension(image_path)
    if image_name.startswith('DJI_0915'):
        return 'training'
    else:
        return 'testing'

In [10]:
for image in images:
    print(f"name: {image['path']}, type: {image_type(image['path'])}")

name: DJI_0804_0001_30m_1_000784.PNG, type: testing
name: DJI_0804_0001_30m_2_000491.PNG, type: testing
name: DJI_0804_0001_30m_3_000142.PNG, type: testing
name: DJI_0804_0002_30m_1_000364.PNG, type: testing
name: DJI_0804_0002_30m_2_001099.PNG, type: testing
name: DJI_0804_0003_10m_000132.PNG, type: testing
name: DJI_0804_0004_20m_001358.PNG, type: testing
name: DJI_0804_0005_30m_000450.PNG, type: testing
name: DJI_0804_0006_40m_001403.PNG, type: testing
name: DJI_0804_0007_50m_000171.PNG, type: testing
name: DJI_0804_0008_20m_001131.PNG, type: testing
name: DJI_0804_0009_40m_000293.PNG, type: testing
name: DJI_0804_0010_60m_000678.PNG, type: testing
name: DJI_0804_0011_30m_1_000498.PNG, type: testing
name: DJI_0804_0011_30m_2_000169.PNG, type: testing
name: DJI_0804_0012_50m_1_000415.PNG, type: testing
name: DJI_0804_0012_50m_2_000011.PNG, type: testing
name: DJI_0804_0012_50m_3_001390.PNG, type: testing
name: DJI_0804_0013_40m_000076.PNG, type: testing
name: DJI_0804_0014_50m_1_0000

## Getting the annotations for the images taken

In [16]:
def get_coco_annotations(images, only_annotated_images: bool=False):
    complete_annotations_file = 'D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/annotations/5-class-annotations.json'

    annotation_data = None
    with open(complete_annotations_file, 'r') as file:
        annotation_data = json.load(file)


    # print(annotation_data['annotations'][0])
    # print(annotation_data['images'][0])

    annotations = {}
    annotations['categories'] = annotation_data['categories']
    annotations['images'] = []
    annotations['annotations'] = []
    # print(annotations)

    image_paths = set([image['path'] for image in images])
    # print(image_paths)

    image_ids = set()
    num_anns = {}
    for image in annotation_data['images']:
        if image['file_name'] in image_paths:
            image_ids.add(image['id'])
            num_anns[image['id']] = 0
            annotations['images'].append(image)

    for an in annotation_data['annotations']:
        if an['image_id'] in image_ids:
            annotations['annotations'].append(an)
            num_anns[an['image_id']] += 1
    # print(num_anns)

    if only_annotated_images:
        annotated_images = []
        for image in annotations['images']:
            if num_anns[image['id']] > 0:
                annotated_images.append(image)
        annotations['images'] = annotated_images
    return annotations

In [18]:
get_coco_annotations(images, True)

{785: 0, 1751: 0, 2189: 1, 2693: 0, 5040: 0, 5652: 0, 7836: 0, 8376: 0, 10733: 1, 10960: 1, 13370: 0, 14008: 4, 16273: 3, 17761: 3, 19251: 2, 20558: 0, 22250: 3, 23687: 3, 24178: 7, 25014: 5, 26590: 14, 27833: 14, 28925: 4, 31962: 3, 33810: 2, 37583: 5, 40786: 0, 41709: 0, 43487: 1, 45635: 1, 49695: 1, 53572: 0, 53793: 1, 54270: 1, 54867: 0, 61543: 2, 63661: 1, 64806: 1, 65102: 0, 71167: 1, 76053: 1, 79440: 0, 83987: 3, 89719: 2, 90822: 1, 91753: 2, 92020: 1, 93523: 1, 97364: 2, 98689: 1, 99708: 0, 100837: 2, 105023: 1, 107336: 1, 108354: 1, 108924: 1, 111261: 1, 112548: 1, 115189: 0, 117526: 2, 119371: 1, 120704: 1, 121215: 1, 121861: 1, 124927: 0}


{'categories': [{'id': 1, 'name': 'person', 'supercategory': ''},
  {'id': 2, 'name': 'boat', 'supercategory': ''},
  {'id': 3, 'name': 'surfboard', 'supercategory': ''},
  {'id': 4, 'name': 'wood', 'supercategory': ''},
  {'id': 5, 'name': 'life_buoy', 'supercategory': ''}],
 'images': [{'id': 2189,
   'width': 1920,
   'height': 1012,
   'file_name': 'DJI_0804_0001_30m_3_000142.PNG',
   'license': 0,
   'flickr_url': '',
   'coco_url': '',
   'date_captured': 0},
  {'id': 10733,
   'width': 1920,
   'height': 1012,
   'file_name': 'DJI_0804_0006_40m_001403.PNG',
   'license': 0,
   'flickr_url': '',
   'coco_url': '',
   'date_captured': 0},
  {'id': 10960,
   'width': 1920,
   'height': 1012,
   'file_name': 'DJI_0804_0007_50m_000171.PNG',
   'license': 0,
   'flickr_url': '',
   'coco_url': '',
   'date_captured': 0},
  {'id': 14008,
   'width': 1920,
   'height': 1012,
   'file_name': 'DJI_0804_0009_40m_000293.PNG',
   'license': 0,
   'flickr_url': '',
   'coco_url': '',
   'date

## Consolidating Everything

In [13]:
def create_dataset(source_folder: str, destination_folder: str, images_per_video: int=-1, only_annotated_image_in_annotations_file: bool=False):
    images = get_images_for_each_video_from_folder('D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/videos', images_per_video)
    annotations = get_coco_annotations(images, only_annotated_image_in_annotations_file)

    training_folder_name = os.path.join(destination_folder, 'training')
    testing_folder_name = os.path.join(destination_folder, 'testing')

    if not os.path.isdir(training_folder_name):
        os.mkdir(training_folder_name)
    if not os.path.isdir(testing_folder_name):
        os.mkdir(testing_folder_name)

    for image in images:
        cv2.imwrite(f"{destination_folder}/{image_type(image['path'])}/{image['path']}", image['image'])
    
    with open(f'{destination_folder}/annotations.json', 'w') as file:
        json.dump(annotations, file)

## Creating the Datasets

In [20]:
# create_dataset(
#     'D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/videos',
#     'D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/single_image_set',
#     1,
#     only_annotated_image_in_annotations_file=True
# )

{275: 4, 1499: 8, 2080: 0, 3693: 0, 4469: 3, 6044: 0, 7820: 0, 8592: 0, 10245: 0, 11584: 2, 13516: 2, 15256: 2, 15808: 4, 17746: 3, 19642: 0, 21049: 1, 22289: 3, 23832: 3, 24394: 6, 26406: 1, 27072: 13, 27803: 14, 31013: 0, 31710: 3, 34246: 0, 36465: 0, 38670: 0, 41056: 0, 43140: 2, 45110: 0, 46537: 1, 51033: 1, 54138: 1, 54689: 4, 54754: 1, 61319: 2, 63703: 1, 64780: 1, 66331: 1, 70526: 0, 76840: 1, 79709: 2, 83263: 1, 89670: 2, 91153: 0, 91487: 4, 92185: 1, 93516: 1, 95395: 1, 98952: 1, 99866: 1, 101779: 1, 102964: 2, 106268: 1, 108476: 1, 108800: 2, 110498: 1, 112895: 1, 114035: 1, 116852: 0, 118466: 1, 119831: 0, 121503: 1, 123256: 0, 125903: 2}


In [21]:
create_dataset(
    'D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/videos',
    'D:/VIT/year4/sem8/Capstone/datasets/MOBDrone/ten_image_set',
    10,
    only_annotated_image_in_annotations_file=True
)

{103: 6, 192: 3, 292: 4, 387: 3, 487: 2, 645: 0, 677: 0, 716: 0, 924: 0, 971: 0, 1299: 0, 1376: 7, 1747: 0, 1749: 0, 1776: 0, 1891: 0, 1921: 0, 1928: 0, 1955: 0, 2032: 0, 2052: 0, 2088: 0, 2091: 0, 2180: 1, 2195: 1, 2212: 2, 2230: 3, 2239: 3, 2288: 3, 2320: 2, 2442: 2, 2555: 0, 2647: 0, 2709: 0, 2773: 0, 2869: 1, 3039: 1, 3177: 1, 3275: 0, 3433: 0, 3963: 0, 4182: 0, 4442: 3, 4635: 3, 4667: 3, 4749: 3, 4941: 0, 4980: 0, 5028: 0, 5034: 0, 5551: 1, 5599: 1, 5640: 0, 5652: 0, 5781: 0, 6078: 0, 6127: 0, 6453: 0, 6457: 0, 6463: 0, 6488: 0, 6595: 2, 6745: 0, 7063: 0, 7431: 0, 7500: 0, 7560: 2, 7658: 1, 7730: 0, 7854: 0, 7993: 2, 8219: 0, 8416: 0, 8614: 0, 8747: 0, 8998: 0, 9023: 0, 9101: 1, 9161: 2, 9189: 2, 9613: 2, 9622: 2, 9682: 2, 9843: 2, 10053: 0, 10091: 0, 10258: 0, 10487: 2, 10610: 2, 10647: 1, 11126: 3, 11271: 2, 11372: 2, 11374: 2, 11415: 2, 11612: 0, 11831: 0, 11870: 0, 11995: 2, 12159: 3, 12286: 0, 12342: 0, 12390: 0, 12563: 0, 12700: 1, 13053: 0, 13175: 0, 13292: 0, 13419: 0, 135