# Dataset analysis

Examples for working with metadata and annotation data from DIVE Web through the REST API.

In [None]:
# set your API Key in .env
%load_ext dotenv
%dotenv

import os
from pprint import pprint
from girder_client import GirderClient
from dive_utils import fromMeta

GIRDER_API_KEY=os.getenv('GIRDER_API_KEY', 'CHANGEME')
diveclient = GirderClient(apiUrl='https://viame.kitware.com/api/v1')
token = diveclient.authenticate(apiKey=GIRDER_API_KEY)
print(f'Authenticated!')

In [None]:
# Load dataset metadata
DATASET_ID="5e4ebcaa78ed364cd0f5e3b9" # Replace this with your own ID

dataset_info = diveclient.getFolder(DATASET_ID)
dataset_name = dataset_info['name']
dataset_type = fromMeta(dataset_info, 'type')
dataset_fps  = fromMeta(dataset_info, 'fps')

print(f'Loaded dataset: {dataset_name}\n')
pprint(dataset_info)

## Load dataset source media

In [None]:
if dataset_type == 'image-sequence':
    dataset_media = diveclient.get(
        f'viame/valid_images', parameters={'folderId': DATASET_ID})
    dataset_media = [item['name'] for item in dataset_media]
else:
    dataset_media = diveclient.get(
        f'viame_detection/clip_meta',
        parameters={'folderId': DATASET_ID}
    )

pprint(dataset_media)

## Load annotations and summarize contents

In [None]:
from dive_tasks import summary

dataset_annotations_json = diveclient.get(
    'viame_detection',
    parameters={'folderId': DATASET_ID},
)

all_summary = {}
summary.summarize_annotations(DATASET_ID, dataset_annotations_json, all_summary)
pprint(f'Summary of {DATASET_ID}')
pprint([ s.dict() for s in all_summary.values()])

## Load detections as CSV

In [None]:
dataset_annotations_csv = diveclient.get(f'viame_detection/{DATASET_ID}/export_detections', jsonResp=False)
print(dataset_annotations_csv.text)