# Creating a Voxel51 Dataset from SkyScan Images
This notebook will create a Voxel51 dataset from images captured by SkyScan. The images will be labeled with the ICAO24 identifer from the ADS-B broadcast.

In [1]:
dataset_name="test-dataset"  #Name of the Voxel51 dataset. It should not exist already.
image_dir="/tf/testing" #Dir with the images. It should be mapped into the container using the Docker volume command.

In [6]:
import glob
import fiftyone as fo
import os

In [4]:
def buildImageList(filePath):
    image_list = []
    for folder, subfolders, files in os.walk(filePath):
        for file in files:
            if file.endswith(".jpg"):
                image_filename = os.path.basename(file)
                external_id = os.path.splitext(image_filename)[0]
                image_path = os.path.abspath(os.path.join(folder, file))
                plane_id = external_id.split("_")[0]
                item = {"file_path": image_path,
                    "external_id": external_id,
                    "icao24": plane_id}
                image_list.append(item)
    return image_list


In [8]:
# Create dataset
try:
    dataset = fo.Dataset(name=dataset_name)
    dataset.persistent = True
    print("Created {} dataset".format(dataset_name))
except ValueError: # IF the dataset already exists, load it instead 
    dataset = fo.load_dataset(name=dataset_name)
    print("Loaded {} dataset".format(dataset_name))
    
image_list = buildImageList(image_dir)

# Add your samples to the dataset
for image in image_list:
    sample = fo.Sample(filepath=image["file_path"])
    
    sample["external_id"] = fo.Classification(label=image["external_id"])
    sample["icao24"] = fo.Classification(label=image["icao24"])

    dataset.add_sample(sample)

Loaded test-dataset dataset
