In [1]:
import os
import glob
import shutil
from tqdm import tqdm
from datetime import datetime

import numpy as np
import pandas as pd

import tator
import panoptes_client

In [2]:
def get_now():
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

### Configs

In [3]:
username = os.getenv('ZOONIVERSE_USERNAME')
password = os.getenv('ZOONIVERSE_PASSWORD')

zoon_project_id = 21853

try:
    # Login to panoptes using username and password
    panoptes_client.Panoptes.connect(username=username, password=password)
    print(f"NOTE: Authentication to Zooniverse successful for {username}")
except Exception as e:
    raise Exception(f"ERROR: Could not login to Panoptes for {username}\n{e}")

try:
    # Get access to the Zooniverse project given the provided credentials
    project = panoptes_client.Project.find(id=zoon_project_id)
    print(f"NOTE: Connected to Zooniverse project '{project.title}' successfully")
except Exception as e:
    raise Exception(f"ERROR: Could not access project {zoon_project_id}.\n{e}")

NOTE: Authentication to Zooniverse successful for Jordan-Pierce
NOTE: Connected to Zooniverse project 'Click-a-Coral' successfully


In [4]:
token = os.getenv('TATOR_TOKEN')
project_id = 70

try:
    # Get the TATOR api given the provided token
    api = tator.get_api(host='https://cloud.tator.io', token=token)
    # Get the correct type of localization for the project (bounding box, attributes)
    tator_project_id = project_id
    state_type_id = 288  # State Type (ROV)
    print(f"NOTE: Authentication to TATOR successful for {api.whoami().username}")
except Exception as e:
    raise Exception(f"ERROR: Could not obtain needed information from TATOR.\n{e}")

NOTE: Authentication to TATOR successful for jordan.pierce


### Make Curated Dataframe

In [None]:
# Find all of the curated data
curated_media = glob.glob(f"{os.path.abspath('../data/curated/')}/*.zip")
curated_media = [os.path.basename(path).split(".")[0] for path in curated_media if os.path.isfile(path)]

print(f"Found {len(curated_media)} curated media files")

218

In [80]:
# Extract metadata for each of the curated media files
curated_metadata = []

for media_id in tqdm(curated_media, desc="Extracting metadata from curated media"):
    # Get the media object from TATOR
    media = api.get_media(media_id)
    
    # Get the media attributes from TATOR    
    fps = media.fps
    height = media.height
    width = media.width
    idx = media.id
    name = media.name
    num_frames = media.num_frames
    created = media.created_datetime

    camera = ""
    cruise_id = ""
    dive_id = ""
    original_filename = ""
    video_part = ""

    if "attributes" in media.to_dict():
        if "Camera" in media.attributes:
            camera = media.attributes["Camera"]
        if "CruiseID" in media.attributes:
            cruise_id = media.attributes["CruiseID"]
        if "DiveID" in media.attributes:
            dive_id = media.attributes["DiveID"]
        if "Original Filename" in media.attributes:
            original_filename = media.attributes["Original Filename"]
        if "VideoPart" in media.attributes:
            video_part = media.attributes["VideoPart"]
            
    metadata = {
        "media_id": media_id,
        "name": name,
        "fps": fps,
        "height": height,
        "width": width,
        "num_frames": num_frames,
        "created_datetime": created,
        "camera": camera,
        "cruise_id": cruise_id,
        "dive_id": dive_id,
        "original_filename": original_filename,
        "video_part": video_part
    }
                
    try:
        start_frame = 0
        start_nav = api.get_state_list(project=tator_project_id, media_id=[media_id], type=state_type_id, 
                                    start=start_frame, stop=start_frame + 1)[0]
        
        mid_frame = int(num_frames // fps // 2)
        mid_nav = api.get_state_list(project=tator_project_id, media_id=[media_id], type=state_type_id, 
                                    start=mid_frame, stop=mid_frame + 1)[0]
        
        end_frame = int(num_frames // fps) - int(fps)
        end_nav = api.get_state_list(project=tator_project_id, media_id=[media_id], type=state_type_id, 
                                    start=end_frame)[-1]
    
        if start_nav:
            if 'attributes' in start_nav.to_dict():
                metadata['lat_0'] =  start_nav.attributes['Latitude']
                metadata['lon_0'] =  start_nav.attributes['Longitude']
                metadata['depth_0'] =  start_nav.attributes['DepthInMeters']
        if mid_nav:
            if 'attributes' in mid_nav.to_dict():
                metadata['lat_1'] =  mid_nav.attributes['Latitude']
                metadata['lon_1'] =  mid_nav.attributes['Longitude']
                metadata['depth_1'] =  mid_nav.attributes['DepthInMeters']
        if end_nav:
            if 'attributes' in end_nav.to_dict():
                metadata['lat_2'] =  end_nav.attributes['Latitude']
                metadata['lon_2'] =  end_nav.attributes['Longitude']
                metadata['depth_2'] =  end_nav.attributes['DepthInMeters']   
            
    except Exception as e:
        pass
    
    curated_metadata.append(metadata)

        

Extracting metadata from curated media: 100%|██████████| 218/218 [06:58<00:00,  1.92s/it]


In [None]:
pd.DataFrame(curated_metadata).to_csv(
    os.path.join(os.path.abspath("../data/"), f"curated_media_w_nav-{get_now()}.csv"),
    index=False,
    header=True,
    mode="w"
)