In [1]:
from constants import DATA_DIR
from credentials import FIFTYONE_CVAT_USERNAME, FIFTYONE_CVAT_PASSWORD
import fiftyone as fo
import fiftyone.brain as fob
import os
import cv2
import datetime
import numpy as np
import fiftyone.utils.splits as fous

In [2]:
!export FIFTYONE_CVAT_USERNAME=FIFTYONE_CVAT_USERNAME
!export FIFTYONE_CVAT_PASSWORD=FIFTYONE_CVAT_PASSWORD

In [3]:
dataset_dir = os.path.join(DATA_DIR, 'rock')
img_dir = os.path.join(dataset_dir, 'images')
name = 'rock'

In [4]:
if name in fo.list_datasets():
    fo.delete_dataset(name)

In [5]:
dataset = fo.Dataset.from_dir(
    dataset_dir=img_dir,
    dataset_type=fo.types.ImageDirectory,
    name=name,
)

 100% |███████████████| 1907/1907 [336.0ms elapsed, 0s remaining, 5.7K samples/s]      


In [6]:
dataset.persistent = True
# results = fob.compute_similarity(dataset, brain_key="img_sim")

In [12]:
sample = dataset.take(1).first()

In [13]:
sample

<SampleView: {
    'id': '620178534907a0f15aa61fe7',
    'media_type': 'image',
    'filepath': '/home/tlancaster6/PycharmProjects/mbuna/datasets/rock/images/rockBHVE_t017_LF_012522_0005_vid_15_27000_00-15-00.00.jpg',
    'tags': BaseList([]),
    'metadata': None,
    'pid': 'rockBHVE_t017_LF_012522',
    'start_date': '012522',
    'vid': '0005_vid',
    'step': 15,
    'frame': 27000,
    'time': '00-15-00',
}>

In [11]:
for sample in dataset:
    fname_split = sample.filepath.split('/')[-1].split('_')
    sample['pid'] = '_'.join(fname_split[:4])
    sample['start_date'] = fname_split[3]
    sample['vid'] = '_'.join(fname_split[4:6])
    sample['step'] = int(fname_split[6])
    sample['frame'] = int(fname_split[7])
    sample['time'] = fname_split[8].split('.')[0]
    sample.save()


In [34]:
results = fob.compute_visualization(
    dataset,
    num_dims=2,
    brain_key="image_embeddings",
    verbose=True,
    seed=51,
)
plot = results.visualize(labels="pid")
plot.show(height=720)

Computing embeddings...
 100% |███████████████| 1907/1907 [8.9m elapsed, 0s remaining, 3.9 samples/s]      
Generating visualization...
UMAP(random_state=51, verbose=True)
Thu Feb  3 13:40:47 2022 Construct fuzzy simplicial set
Thu Feb  3 13:40:48 2022 Finding Nearest Neighbors
Thu Feb  3 13:40:48 2022 Finished Nearest Neighbor Search
Thu Feb  3 13:40:48 2022 Construct embedding


Epochs completed:   0%|            0/500 [00:00]

Thu Feb  3 13:40:53 2022 Finished embedding






FigureWidget({
    'data': [{'customdata': array(['61fbf27b263fcabef29a0a45', '61fbf27b263fcabef29a0a46',
    …

In [31]:
vis_results = fob.compute_visualization(dataset, brain_key="img_vis")
for n in [50, 100, 250, 500, 1000]:
    results.find_unique(n)
    plot = results.visualize_unique(visualization=vis_results)
    view = dataset.select(results.unique_ids)
    for sample in view:
        sample.tags.append(f'top_{n}_unique')
        sample.save()
    plot = results.visualize_unique(visualization=vis_results)
    plot.show()

Computing embeddings...
 100% |███████████████| 1907/1907 [8.4m elapsed, 0s remaining, 4.1 samples/s]      
Generating visualization...
UMAP( verbose=True)
Thu Feb  3 11:02:15 2022 Construct fuzzy simplicial set
Thu Feb  3 11:02:17 2022 Finding Nearest Neighbors
Thu Feb  3 11:02:19 2022 Finished Nearest Neighbor Search
Thu Feb  3 11:02:21 2022 Construct embedding


Epochs completed:   0%|            0/500 [00:00]

Thu Feb  3 11:02:25 2022 Finished embedding
Generating index...
Index complete
Computing unique samples...
threshold: 1.000000, kept: 960, target: 50
threshold: 2.000000, kept: 81, target: 50
threshold: 4.000000, kept: 15, target: 50
threshold: 3.000000, kept: 39, target: 50
threshold: 2.500000, kept: 54, target: 50
threshold: 2.750000, kept: 46, target: 50
threshold: 2.625000, kept: 50, target: 50
Uniqueness computation complete






FigureWidget({
    'data': [{'customdata': array(['61fbf27b263fcabef29a0a47', '61fbf27b263fcabef29a0a48',
    …

Computing unique samples...
threshold: 1.000000, kept: 960, target: 100
threshold: 2.000000, kept: 81, target: 100
threshold: 1.500000, kept: 184, target: 100
threshold: 1.750000, kept: 107, target: 100
threshold: 1.875000, kept: 94, target: 100
threshold: 1.812500, kept: 98, target: 100
threshold: 1.781250, kept: 102, target: 100
threshold: 1.796875, kept: 101, target: 100
threshold: 1.804688, kept: 99, target: 100
threshold: 1.800781, kept: 101, target: 100
threshold: 1.802734, kept: 100, target: 100
Uniqueness computation complete






FigureWidget({
    'data': [{'customdata': array(['61fbf27b263fcabef29a0a47', '61fbf27b263fcabef29a0a48',
    …

Computing unique samples...
threshold: 1.000000, kept: 960, target: 250
threshold: 2.000000, kept: 81, target: 250
threshold: 1.500000, kept: 184, target: 250
threshold: 1.250000, kept: 370, target: 250
threshold: 1.375000, kept: 249, target: 250
threshold: 1.312500, kept: 301, target: 250
threshold: 1.343750, kept: 269, target: 250
threshold: 1.359375, kept: 258, target: 250
threshold: 1.367188, kept: 256, target: 250
threshold: 1.371094, kept: 251, target: 250
threshold: 1.373047, kept: 251, target: 250
threshold: 1.374023, kept: 249, target: 250
threshold: 1.373535, kept: 250, target: 250
Uniqueness computation complete






FigureWidget({
    'data': [{'customdata': array(['61fbf27b263fcabef29a0a47', '61fbf27b263fcabef29a0a48',
    …

Computing unique samples...
threshold: 1.000000, kept: 960, target: 500
threshold: 2.000000, kept: 81, target: 500
threshold: 1.500000, kept: 184, target: 500
threshold: 1.250000, kept: 370, target: 500
threshold: 1.125000, kept: 590, target: 500
threshold: 1.187500, kept: 472, target: 500
threshold: 1.156250, kept: 519, target: 500
threshold: 1.171875, kept: 492, target: 500
threshold: 1.164062, kept: 511, target: 500
threshold: 1.167969, kept: 500, target: 500
Uniqueness computation complete






FigureWidget({
    'data': [{'customdata': array(['61fbf27b263fcabef29a0a47', '61fbf27b263fcabef29a0a48',
    …

Computing unique samples...
threshold: 1.000000, kept: 960, target: 1000
threshold: 0.500000, kept: 1907, target: 1000
threshold: 0.750000, kept: 1792, target: 1000
threshold: 0.875000, kept: 1435, target: 1000
threshold: 0.937500, kept: 1188, target: 1000
threshold: 0.968750, kept: 1071, target: 1000
threshold: 0.984375, kept: 1014, target: 1000
threshold: 0.992188, kept: 989, target: 1000
threshold: 0.988281, kept: 998, target: 1000
threshold: 0.986328, kept: 1007, target: 1000
threshold: 0.987305, kept: 1002, target: 1000
threshold: 0.987793, kept: 999, target: 1000
threshold: 0.987549, kept: 1001, target: 1000
threshold: 0.987671, kept: 1001, target: 1000
threshold: 0.987732, kept: 999, target: 1000
threshold: 0.987701, kept: 999, target: 1000
threshold: 0.987686, kept: 999, target: 1000
threshold: 0.987679, kept: 999, target: 1000
threshold: 0.987675, kept: 999, target: 1000
threshold: 0.987673, kept: 1001, target: 1000
threshold: 0.987674, kept: 1000, target: 1000
Uniqueness comp





FigureWidget({
    'data': [{'customdata': array(['61fbf27b263fcabef29a0a47', '61fbf27b263fcabef29a0a48',
    …

In [14]:
view = dataset.take(100)
session = fo.launch_app(view)

In [15]:
anno_key = 'practice_anno'
anno_results = view.annotate(
    anno_key=anno_key,
    label_type='detections',
    label_field='ground_truth',
    classes=['fish', 'pipe'],
    launch_editor=True
)


Please enter your login credentials.
You can avoid this in the future by setting your `FIFTYONE_CVAT_USERNAME` and `FIFTYONE_CVAT_PASSWORD` environment variables
Username: tlancaster6
Password: ········
Uploading samples to CVAT...
Computing image metadata...
 100% |█████████████████| 100/100 [844.0ms elapsed, 0s remaining, 118.5 samples/s]      
Upload complete
Please enter your login credentials.
You can avoid this in the future by setting your `FIFTYONE_CVAT_USERNAME` and `FIFTYONE_CVAT_PASSWORD` environment variables
Username: tlancaster6
Password: ········
Launching editor at 'https://cvat.org/tasks/196049/jobs/283169'...


In [16]:
dataset.list_annotation_runs()

['practice_anno']

In [17]:
dataset.load_annotations(anno_key)

Please enter your login credentials.
You can avoid this in the future by setting your `FIFTYONE_CVAT_USERNAME` and `FIFTYONE_CVAT_PASSWORD` environment variables
Username: tlancaster6
Password: ········
Downloading labels from CVAT...
Download complete
Loading labels for field 'ground_truth'...
 100% |█████████████████████| 4/4 [17.5ms elapsed, 0s remaining, 229.2 samples/s] 


In [18]:
dataset.save()

In [19]:
dataset.first()

<Sample: {
    'id': '620178534907a0f15aa618bf',
    'media_type': 'image',
    'filepath': '/home/tlancaster6/PycharmProjects/mbuna/datasets/rock/images/rockBHVE_t003_MZalbino_012522_0001_vid_15_0_00-00-00.00.jpg',
    'tags': BaseList([]),
    'metadata': None,
    'pid': 'rockBHVE_t003_MZalbino_012522',
    'start_date': '012522',
    'vid': '0001_vid',
    'step': 15,
    'frame': 0,
    'time': '00-00-00',
    'ground_truth': None,
}>

In [56]:
fous.random_split(dataset, {"train": 0.7, "val": 0.2, 'test': 0.1})

In [61]:
train_view = dataset.match_tags.match_tags('train')
val_view = dataset.match_tags.match_tags('val')
train_view.export(os.path.join(DATA_DIR, 'practice_anno'), 
           dataset_type=fo.types.YOLOv5Dataset,
           label_field='ground_truth', split='train')
val_view.export(os.path.join(DATA_DIR, 'practice_anno'), 
           dataset_type=fo.types.YOLOv5Dataset,
           label_field='ground_truth', split='val')

 100% |███████████████████| 75/75 [148.8ms elapsed, 0s remaining, 503.9 samples/s]     
Directory '/home/tlancaster6/PycharmProjects/mbuna/datasets/practice_anno' already exists; export will be merged with existing files
 100% |███████████████████| 25/25 [53.6ms elapsed, 0s remaining, 466.4 samples/s] 
