# FiftyOne local walkthrough (quickstart_video)

In [1]:
import fiftyone as fo
import fiftyone.zoo as foz

In [2]:
print(fo.list_datasets())

['2024.07.15.15.23.53', '2024.07.15.15.27.44', 'custom_yolo_person', 'custom_yolo_train', 'custom_yolo_train_person', 'custom_yolo_val', 'mscoco2017-sama_train', 'mscoco2017-sama_val', 'mscoco2017_train', 'mscoco2017_val', 'quickstart', 'quickstart-video', 'quickstart-video-frames']


# Dataset

### Loading dataset zoo provided by FiftyOne

In [3]:
foz.load_zoo_dataset?

[1;31mSignature:[0m
[0mfoz[0m[1;33m.[0m[0mload_zoo_dataset[0m[1;33m([0m[1;33m
[0m    [0mname_or_url[0m[1;33m,[0m[1;33m
[0m    [0msplit[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0msplits[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mlabel_field[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdataset_name[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdownload_if_necessary[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mdrop_existing_dataset[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mpersistent[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0moverwrite[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mcleanup[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mprogress[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mkwargs[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
L

## Load name

### Load new/overwrite existing

In [4]:
# quickstart dataset
# dataset_dir = "D:\\dataset_d\\fiftyone_quickstart_video"
dataset = foz.load_zoo_dataset("quickstart-video", progress=True, overwrite=True)

Overwriting existing directory 'C:\Users\Legion\fiftyone\quickstart-video'
Downloading dataset to 'C:\Users\Legion\fiftyone\quickstart-video'
Downloading dataset...
 100% |████████████████████████████████████████████|  281.7Mb/281.7Mb [55.7s elapsed, 0s remaining, 4.4Mb/s]       
Extracting dataset...
Parsing dataset metadata
Found 10 samples
Dataset info written to 'C:\Users\Legion\fiftyone\quickstart-video\info.json'
Loading existing dataset 'quickstart-video'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [5]:
print(fo.list_datasets())

['2024.07.15.15.23.53', '2024.07.15.15.27.44', 'custom_yolo_person', 'custom_yolo_train', 'custom_yolo_train_person', 'custom_yolo_val', 'mscoco2017-sama_train', 'mscoco2017-sama_val', 'mscoco2017_train', 'mscoco2017_val', 'quickstart', 'quickstart-video', 'quickstart-video-frames']


### Load existing

In [28]:
dataset = fo.load_dataset("quickstart-video")

## Print dataset specification

In [6]:
print(dataset.media_type)

video


In [7]:
print(dataset)

Name:        quickstart-video
Media type:  video
Num samples: 10
Persistent:  False
Tags:        []
Sample fields:
    id:               fiftyone.core.fields.ObjectIdField
    filepath:         fiftyone.core.fields.StringField
    tags:             fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.VideoMetadata)
    created_at:       fiftyone.core.fields.DateTimeField
    last_modified_at: fiftyone.core.fields.DateTimeField
Frame fields:
    id:               fiftyone.core.fields.ObjectIdField
    frame_number:     fiftyone.core.fields.FrameNumberField
    created_at:       fiftyone.core.fields.DateTimeField
    last_modified_at: fiftyone.core.fields.DateTimeField
    detections:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


## Delete dataset (optional)

In [16]:
dataset.delete()

## Loading offline dataset

In [6]:
# # Load COCO formatted dataset
# coco_dataset = fo.Dataset.from_dir(
#     name="mscoco2017_train",
#     dataset_type=fo.types.COCODetectionDataset,
#     data_path="D:\\dataset_d\\mscoco\\coco_train2017\\train2017\\",
#     labels_path="D:\\dataset_d\\mscoco\\coco_ann2017\\annotations\\instances_train2017.json",
#     include_id=True,
# )

# # Verify that the class list for our dataset was imported
# print(coco_dataset.default_classes)  # ['airplane', 'apple', ...]

# print(coco_dataset)

# set directory and labels
train_dir = "D:\\dataset_d\\mscoco\\coco_train2017\\train2017\\"
val_dir = "D:\\dataset_d\\mscoco\\coco_val2017\\val2017\\"
train_labels = "D:\\dataset_d\\mscoco-sama\\sama_coco_labels_train.json"
val_labels = "D:\\dataset_d\\mscoco-sama\\sama_coco_labels_val.json"

# Load Sama-COCO dataset
coco_sama_train = fo.Dataset.from_dir(
    name="mscoco2017-sama_train",
    dataset_type=fo.types.COCODetectionDataset,
    data_path=train_dir,
    labels_path=train_labels,
    include_id=True,
    persistent=False,
    overwrite=False
)

coco_sama_val = fo.Dataset.from_dir(
    name="mscoco2017-sama_val",
    dataset_type=fo.types.COCODetectionDataset,
    data_path=val_dir,
    labels_path=val_labels,
    include_id=True,
    persistent=False,
    overwrite=False
)



 100% |███████████| 118287/118287 [44.9m elapsed, 0s remaining, 34.2 samples/s]      
 100% |███████████████| 5000/5000 [1.9m elapsed, 0s remaining, 52.2 samples/s]      


In [19]:
# # Combine the train and val datasets into a single FiftyOne dataset
# dataset = fo.Dataset(name="mscoco2017-sama_dataset", overwrite=True)
# dataset.add_samples(coco_sama_train)
# dataset.add_samples(coco_sama_val)

## Filters

In [7]:
train_view = coco_sama_train.filter_labels("detections", fo.ViewField("label") == "person")
val_view = coco_sama_val.filter_labels("detections", fo.ViewField("label") == "person")
# coco_sama_view = dataset.filter_labels("detections", fo.ViewField("label") == "person")

In [10]:
print(train_view)
print(val_view)

Dataset:     mscoco2017-sama_train
Media type:  image
Num samples: 65883
Sample fields:
    id:               fiftyone.core.fields.ObjectIdField
    filepath:         fiftyone.core.fields.StringField
    tags:             fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    created_at:       fiftyone.core.fields.DateTimeField
    last_modified_at: fiftyone.core.fields.DateTimeField
    detections:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    segmentations:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    coco_id:          fiftyone.core.fields.IntField
View stages:
    1. FilterLabels(field='detections', filter={'$eq': ['$$this.label', 'person']}, only_matches=True, trajectories=False)
Dataset:     mscoco2017-sama_val
Media type:  image
Num samples: 2776
Sample fields:
    id:               fifty

## Start app

In [20]:
# # delete from database
# dataset.delete()

In [8]:
session = fo.launch_app(dataset)

# Export

## Export (FiftyOneDataset)

In [9]:
export_dir = "D:\\dataset_d\\quickstart-video-test\\"

# Export the dataset without copying the media files
dataset.export(
    export_dir=export_dir,
    dataset_type=fo.types.FiftyOneDataset,
    export_media=True
)

# # Export the dataset without media, including only the relative path of
# # each image with respect to the given `rel_dir` so that the dataset
# # can be imported with a different `rel_dir` prepended later
# dataset_or_view.export(
#     export_dir=export_dir,
#     dataset_type=fo.types.FiftyOneDataset,
#     export_media=False,
#     rel_dir="/common/images/dir",
# )

Directory 'D:\dataset_d\quickstart-video-test\' already exists; export will be merged with existing files
Exporting samples...
 100% |██████████████████████| 10/10 [60.9ms elapsed, 0s remaining, 164.1 docs/s] 
Exporting frames...
 100% |██████████████████| 1279/1279 [409.9ms elapsed, 0s remaining, 3.1K docs/s]      


In [15]:
import json

# Specify the path to your JSON file
json_file_path = "D:\\dataset_d\\quickstart-video-test\\frames.json"

# Load and parse the JSON file
with open(json_file_path, "r") as file:
    data = json.load(file)

# Check if the JSON data is a list and has at least one element
if isinstance(data, list) and len(data) > 0:
    first_occurrence = data[0]

    # List the properties (keys) of the first occurrence
    properties = first_occurrence.keys()

    print("Properties of the first occurrence:")
    for prop in properties:
        print(prop)
else:
    print("The JSON data is not a list or is empty.")

The JSON data is not a list or is empty.


## Export (FiftyOneVideoLabelsDataset)

In [18]:
# Iterate over the video samples
for n, sample in enumerate(dataset):
    # Get the sample ID
    sample_id = sample.id
    print(f"Sample {n} ID: {sample_id}")

    # Get the frames of the video sample
    frames = sample.frames
    # print(frames)

    # Iterate through the frames
    for frame_number, frame in frames.items():
        print(f"Frame number: {frame_number}")

        # Access frame-level fields
        # print(len(frame.detections.detections)) # find amount of detections
        print(frame.detections.detections)
        # frame_filepath = frame.filepath
        # frame_metadata = frame.metadata

        # print(f"Frame filepath: {frame_filepath}")
        # print(f"Frame metadata: {frame_metadata}")


Sample 0 ID: 67513dd0716fae05a741c9ed
Frame number: 1
[<Detection: {
    'id': '67513dcf716fae05a741c800',
    'attributes': {},
    'tags': [],
    'label': 'vehicle',
    'bounding_box': [0.869835, 0.749655, 0.13016499999999998, 0.248162],
    'mask': None,
    'confidence': None,
    'index': 0,
    'type': 'sedan',
}>, <Detection: {
    'id': '67513dcf716fae05a741c801',
    'attributes': {},
    'tags': [],
    'label': 'road sign',
    'bounding_box': [
        0.906912,
        0.409208,
        0.06508199999999997,
        0.11029500000000003,
    ],
    'mask': None,
    'confidence': None,
    'index': 1,
    'type': 'stop',
}>, <Detection: {
    'id': '67513dcf716fae05a741c802',
    'attributes': {},
    'tags': [],
    'label': 'road sign',
    'bounding_box': [0.13433, 0.268612, 0.101239, 0.09742699999999999],
    'mask': None,
    'confidence': None,
    'index': 2,
    'type': 'other',
}>, <Detection: {
    'id': '67513dcf716fae05a741c803',
    'attributes': {},
    'tags

In [20]:
import fiftyone as fo

export_dir = "D:\\dataset_d\\quickstart-video-FiftyOneVideoLabelsDataset\\"
# label_field = "ground_truth"  # for example
label_field = frame.detections.detections

# Export the dataset
dataset.export(
    export_dir=export_dir,
    dataset_type=fo.types.FiftyOneVideoLabelsDataset,
    label_field=label_field,
)

Directory 'D:\dataset_d\quickstart-video-FiftyOneVideoLabelsDataset\' already exists; export will be merged with existing files


TypeError: unhashable type: 'BaseList'

## Export (COCO)
Needs to convert into image first

In [29]:
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.video as fouv
import fiftyone.core.collections as collections

# # Load the quickstart-video dataset
# video_dataset = foz.load_zoo_dataset("quickstart-video")

# Create a new dataset for the extracted frames
image_dataset = fo.Dataset("quickstart-video-frames", overwrite=True)

In [37]:
output_dir = "D:\\dataset_d\\quickstart-video-frames\\"
samples = fouv.sample_videos(
    dataset,
    output_dir=output_dir,
    force_sample=True
)



 100% |███████████████████| 10/10 [5.0s elapsed, 0s remaining, 2.1 samples/s]      


In [41]:
import glob
a = glob.glob(f"{output_dir}*")
print(a)

['D:\\dataset_d\\quickstart-video-frames\\0587e1cfc2344523922652d8b227fba4-000014-video_052', 'D:\\dataset_d\\quickstart-video-frames\\0587e1cfc2344523922652d8b227fba4-000014-video_164', 'D:\\dataset_d\\quickstart-video-frames\\0fvbpytzBOM_068', 'D:\\dataset_d\\quickstart-video-frames\\0fvbpytzBOM_120', 'D:\\dataset_d\\quickstart-video-frames\\jjoOEAS7pB0_015-3', 'D:\\dataset_d\\quickstart-video-frames\\mGL3LHsFckQ_001-4', 'D:\\dataset_d\\quickstart-video-frames\\Ulcb3AjxM5g_053-1', 'D:\\dataset_d\\quickstart-video-frames\\xbzSaNGNQwI_008-1', 'D:\\dataset_d\\quickstart-video-frames\\xbzSaNGNQwI_008-3', 'D:\\dataset_d\\quickstart-video-frames\\ZSbUmCNUfRs_001-4']


In [46]:
for frame_number, frame in frames.items():
    image_sample = fo.Sample(filepath=frame)
    image_dataset.add_sample(image_sample)

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


In [34]:
# Iterate over the video samples
# print(type(dataset))
for sample in dataset:
    video_path = sample.filepath
    print(video_path)
        
    # Extract frames from the video
    frames = fouv.sample_videos(
        dataset,
        output_dir="D:\\dataset_d\\quickstart-video-frames\\",
    )
    # print(frames)
    # # Add frames to the new image dataset
    # for frame_number, frame in frames.items():
    #     image_sample = fo.Sample(filepath=frame)
    #     image_dataset.add_sample(image_sample)

# import fiftyone as fo
# import fiftyone.zoo as foz
# import fiftyone.utils.video as fouv

# # Specify the output directory to save the sampled frames
# output_dir = "D:\\dataset_d\\quickstart-video-frames\\"

# # Sample frames from the videos
# samples = fouv.sample_videos(
#     dataset,
#     # every_n_frames=30,  # Sample one frame every 30 frames
#     output_dir=output_dir
# )

print(f"Sampled frames saved to {output_dir}")

C:\Users\Legion\fiftyone\quickstart-video\data\Ulcb3AjxM5g_053-1.mp4


ValueError: Expected samples to be a <class 'fiftyone.core.collections.SampleCollection'>; found <class 'fiftyone.core.sample.Sample'>

In [None]:
# Launch the FiftyOne app to view the image dataset
session = fo.launch_app(image_dataset)

In [17]:
# # Define the export directories
# train_export_dir = "D:\\dataset_d\\mscoco-sama_person\\coco2017-sama_person_train"
# val_export_dir = "D:\\dataset_d\\mscoco-sama_person\\coco2017-sama_person_val"
video_export_dir = "D:\\dataset_d\\quickstart-video-test-coco\\"

dataset.export(
    export_dir=video_export_dir,
    dataset_type=fo.types.COCODetectionDataset,
    label_field="detections"
)

# # Export the filtered train dataset in COCO format
# train_view.export(
#     export_dir=val_export_dir,
#     dataset_type=fo.types.COCODetectionDataset,
#     label_field="detections"
# )

# # Export the filtered validation dataset in COCO format
# val_view.export(
#     export_dir=val_export_dir,
#     dataset_type=fo.types.COCODetectionDataset,
#     label_field="detections"
# )
print("export done")




   0% |/------------------|  0/10 [15.0ms elapsed, ? remaining, ? samples/s] 


ValueError: Expected media type 'image' but found 'video' for filepath 'C:\Users\Legion\fiftyone\quickstart-video\data\Ulcb3AjxM5g_053-1.mp4'

In [None]:
# dataset = foz.load_zoo_dataset(
#     "coco-2017",
#     split=["train","val"]
#     label_types=["detections", "segmentations"],
#     classes=["person"],
#     # max_samples=50,
# )

## Export (Ultralytics YOLO)

In [7]:
# # Define the export directories
train_export_dir = "D:\\dataset_d\\mscoco-sama_person_yolo\\train"
val_export_dir = "D:\\dataset_d\\mscoco-sama_person_yolo\\val"

# Export the filtered train dataset in YOLO format
train_view.export(
    export_dir=train_export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field="detections"
)

# Export the filtered validation dataset in YOLO format
val_view.export(
    export_dir=val_export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field="detections"
)
print("export done")



 100% |█████████████| 65883/65883 [20.4m elapsed, 0s remaining, 49.8 samples/s]      
 100% |███████████████| 2776/2776 [52.4s elapsed, 0s remaining, 61.9 samples/s]      
export done


# Evaluation

In [32]:
import os
os.getcwd()

'C:\\Users\\Legion\\conda_notebook\\FiftyOne'