## INSTALL REQUIRED LIBRARIES

In [None]:
!pip install fiftyone

## LOAD DATASET

In [None]:
import os
import shutil
import fiftyone as fo
import fiftyone.zoo as foz

In [None]:
DATASET_SIZE = 10000
TEST_SPLIT = 0.1
TRAIN_SPLIT = 1 - TEST_SPLIT

In [None]:
# define the storage path
storage_path = "/content/aircraft_data"

# specify the dataset name and labels
dataset_name = "open-images-v7"
label_types = ["segmentations"]
classes = ["Airplane"]

In [None]:
# load the dataset
dataset = foz.load_zoo_dataset(
    dataset_name,
    split = "train",
    label_types = label_types,
    classes = classes,
    max_samples = DATASET_SIZE,
    dataset_dir = storage_path,
)

## INSPECT DATASET

In [None]:
print(dataset.get_field_schema())

In [None]:
print(dataset.default_classes)

In [None]:
sample = dataset.first()
print(sample)

In [None]:
print(dataset)

## EXPORT IMAGES & MASKS

In [None]:
import cv2
import numpy as np
from skimage import measure

In [None]:
# export the dataset
export_dir = "/content/aircraft_export"

image_dir = os.path.join(export_dir, "images")
label_dir = os.path.join(export_dir, "labels")
mask_dir = os.path.join(export_dir, "masks")


In [None]:
for x in ["train", "test"]:
    os.makedirs(os.path.join(image_dir, x), exist_ok = True)
    os.makedirs(os.path.join(label_dir, x), exist_ok = True)
    os.makedirs(os.path.join(mask_dir, x), exist_ok = True)

In [None]:
def resize_mask(mask, bbox, image_shape):
    mask = np.array(mask, dtype=np.uint8)

    # absolute bounding box
    x, y, w, h = (
        int(bbox[0] * image_shape[1]),
        int(bbox[1] * image_shape[0]),
        int(bbox[2] * image_shape[1]),
        int(bbox[3] * image_shape[0]),
    )

    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
    mask_full = np.zeros((image_shape[0], image_shape[1]), dtype=np.uint8)
    mask_full[y:y+h, x:x+w] = mask_resized

    return mask_full

In [None]:
def export_mask(mask, image_name, split):
    mask_uint8 = (mask * 255).astype(np.uint8)
    mask_path = os.path.join(mask_dir, split, image_name)
    cv2.imwrite(mask_path, mask_uint8)

In [None]:
multi_instance_count = 0
no_instance_count = 0

for i, image in enumerate(dataset):
    image_path = image.filepath
    image_name = os.path.basename(image_path)

    split = "train" if (i < (TRAIN_SPLIT * DATASET_SIZE)) else "test"
    export_path = os.path.join(image_dir, split, image_name)

    detections = image.ground_truth.detections

    # dont include multi
    if (len(detections) > 1):
        multi_instance_count += 1
        continue

    image_array = cv2.imread(image_path)
    cv2.imwrite(export_path, image_array)

    for detection in image.ground_truth.detections:
        if detection.mask is not None:
            rm = resize_mask(detection.mask, list(detection.bounding_box), image_array.shape)
            export_mask(rm, image_name, split)
        else:
            no_instance_count += 1


In [None]:
print("Images with multi instances:", multi_instance_count)
print("Images with no instances:", no_instance_count)

## SAVE DATASET TO GOOGLE DRIVE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!scp -r /content/aircraft_export/ "/content/drive/MyDrive/aicraft_export_3"