In [None]:
import os
import glob
import shutil
from tqdm import tqdm
from datetime import datetime

import numpy as np
import pandas as pd

import tator
import panoptes_client

In [None]:
def upload_media(client, project, media_id, dataframe, set_active):
    """

    :param client:
    :param project:
    :param media_id:
    :param dataframe:
    :return:
    """
    try:
        # Create subject set, link to project
        subject_set = client.SubjectSet()
        subject_set.links.project = project
        subject_set.display_name = str(media_id)
        subject_set.save()
        
        # Reload the project
        project.reload()

        # Convert the dataframe (frame paths) to a dict
        subject_dict = dataframe.to_dict(orient='records')
        # Create a new dictionary with 'Path' as keys and other values as values
        subject_meta = {d['Path']: {k: v for k, v in d.items() if k != 'Path'} for d in subject_dict}

        # Create subjects from the meta
        subjects = []
        subject_ids = []

        # Loop through each of the frames and convert to a subject (creating a subject set)
        for filename, metadata in tqdm(subject_meta.items()):
            # Create the subject
            subject = client.Subject()
            # Link subject to project
            subject.links.project = project
            subject.add_location(filename)
            # Update meta
            subject.metadata.update(metadata)
            # Save
            subject.save()
            # Append
            subjects.append(subject)
            subject_ids.append(subject.id)

        # Add the list of subjects to set
        subject_set.add(subjects)
        # Save the subject set
        subject_set.save()
        project.save()

    except Exception as e:
        raise Exception(f"ERROR: Could not finish uploading subject set for {media_id} to Zooniverse.\n{e}")

    if set_active:

        try:
            # Attaching the new subject set to all the active workflows
            workflow_ids = project.__dict__['raw']['links']['active_workflows']

            # If there are active workflows, link them to the next subject sets
            for workflow_id in tqdm(workflow_ids):
                # Create Workflow object
                workflow = client.Workflow(workflow_id)
                workflow_name = workflow.__dict__['raw']['display_name']
                # Add the subject set created previously
                print(f"\nNOTE: Adding subject set {subject_set.display_name} to workflow {workflow_name}")
                workflow.add_subject_sets([subject_set])
                # Save
                workflow.save()
                project.save()

        except Exception as e:
            raise Exception(f"ERROR: Could not link media {media_id} to project workflows.\n{e}")

    # Update the dataframe to now contain the subject IDs
    # This is needed when downloading annotations later.
    dataframe['Subject_ID'] = subject_ids

    return dataframe

#### Credentials

In [None]:
token = os.getenv('TATOR_TOKEN')
project_id = 70

try:
    # Get the TATOR api given the provided token
    api = tator.get_api(host='https://cloud.tator.io', token=token)
    # Get the correct type of localization for the project (bounding box, attributes)
    tator_project_id = project_id
    state_type_id = 288  # State Type (ROV)
    print(f"NOTE: Authentication to TATOR successful for {api.whoami().username}")
except Exception as e:
    raise Exception(f"ERROR: Could not obtain needed information from TATOR.\n{e}")

In [None]:
username = os.getenv('ZOONIVERSE_USERNAME')
password = os.getenv('ZOONIVERSE_PASSWORD')

zoon_project_id = 21853

try:
    # Login to panoptes using username and password
    panoptes_client.Panoptes.connect(username=username, password=password)
    print(f"NOTE: Authentication to Zooniverse successful for {username}")
except Exception as e:
    raise Exception(f"ERROR: Could not login to Panoptes for {username}\n{e}")

try:
    # Get access to the Zooniverse project given the provided credentials
    project = panoptes_client.Project.find(id=zoon_project_id)
    print(f"NOTE: Connected to Zooniverse project '{project.title}' successfully")
except Exception as e:
    raise Exception(f"ERROR: Could not access project {zoon_project_id}.\n{e}")

#### Needs Uploading

In [None]:
media_ids = [
    '13773236',
    '13849318',
    '13817345',
    '14399481',
    '17093804',
    '14405503',
    '13756556',
    '17093860',
    '13759791',
    '13754339',
    '17093861',
    '17093864',
    '13723028',
    '13849916',
    '13849706',
    '14405003',
    '13759171',
    '14409485',
    '13757674',
    '13800889',
    '13849803',
    '17093852',
    '13753671',
    '17093859',
    '17093853',
    '17093842',
    '13849372',
    '14406510',
    '17093839',
    '14403746',
    '13759653',
    '13849299',
    '14411239',
    '13849343',
    '13849727',
    '13808161',
    '17093740',
    '17093879',
    '14409735',
    '13760111',
    '13773515',
    '17093892',
    '13753944',
    '14402741',
    '13725519',
    '14393472',
    '14388418',
    '13849672',
    '13723943',
    '14407606',
    '14411903',
    '14405002',
    '13760355',
    '17093826',
    '14406760',
    '13798701',
    '17093847',
    '13773086',
    '17093870',
    '13772881',
    '13849797',
    '14405754',
    '13802769',
    '14403247',
    '13849687',
    '14404072',
    '17093770',
    '17093766',
    '13849370',
    '17093817',
    '17093857',
    '13775472',
    '14407856',
    '17093755',
    '14404702',
    '13798999',
    '17093831',
    '13760124',
    '14398477',
    '13817627',
    '17093814',
    '13722070',
    '14408480',
    '14408730',
    '13808199',
    '13754680',
    '13800998',
    '17093793',
    '14411489',
    '13751248',
    '13849670',
    '14410740',
    '13849222',
    '14406305',
    '14397224',
    '17093858',
    '13849720',
    '14410349',
    '17093760',
    '13757903',
    '14401993',
    '14404451',
    '13849697',
    '14406259',
    '13754287',
    '11113559',
]

#### Prepare

In [None]:
curated_path = os.path.abspath("../data/curated")

for media_id in media_ids:
    # Assert that the zip file exists
    zip_path = os.path.join(curated_path, f"{media_id}.zip")
    if not os.path.exists(zip_path):
        raise Exception(f"ERROR: Could not find zip file for media {media_id} at {zip_path}.")

In [None]:
temp_path = os.path.abspath("../data/temp")
os.makedirs(temp_path, exist_ok=True)

for media_id in media_ids:
    # Unzip the media
    zip_path = os.path.join(curated_path, f"{media_id}.zip")
    dst_path = os.path.join(temp_path, media_id)
    
    # Check if zip exists and destination doesn't exist yet
    if not os.path.exists(zip_path):
        raise Exception(f"ERROR: Could not find zip file for media {media_id} at {zip_path}.")
        
    if os.path.exists(dst_path):
        print(f"NOTE: Directory already exists for {media_id}, skipping unzip")
        continue
        
    # Create destination directory
    os.makedirs(dst_path, exist_ok=True)
    
    try:
        # Try to unpack the archive, handling empty zip files
        print(f"NOTE: Unzipping {zip_path} to {temp_path}")
        shutil.unpack_archive(zip_path, temp_path, 'zip')
    except Exception as e:
        print(f"WARNING: Issue with unpacking {media_id}: {str(e)}")
        continue
    
    # Check if the frames directory exists
    frames_dir = os.path.join(dst_path, "frames")
    frames_csv = os.path.join(dst_path, "frames.csv")
    
    if not os.path.exists(frames_dir):
        print(f"ERROR: Could not find frames directory for media {media_id} at {frames_dir}.")
        
    if not os.path.exists(frames_csv):
        print(f"ERROR: Could not find frames.csv for media {media_id} at {frames_csv}.")


In [None]:
for media_id in media_ids:
    # Get the path to the frames directory and csv file
    dst_path = os.path.join(temp_path, media_id)
    frames_dir = os.path.join(dst_path, "frames")
    frames_csv = os.path.join(dst_path, "frames.csv")

    # Read the CSV file into a DataFrame
    try:
        df = pd.read_csv(frames_csv)
        df['Path'] = [os.path.join(frames_dir, os.path.basename(filename)) for filename in df['Path']]
        df.to_csv(frames_csv, index=False)
    except Exception as e:
        print(f"ERROR: Could not read CSV file for media {media_id} at {frames_csv}.\n{e}")
        continue

### Upload

In [None]:
for media_id in media_ids:
    # Get the path to the frames directory and csv file
    dst_path = os.path.join(temp_path, media_id)
    frames_dir = os.path.join(dst_path, "frames")
    frames_csv = os.path.join(dst_path, "frames.csv")

    if not os.path.exists(frames_csv) or not os.path.exists(frames_dir):
        print(f"ERROR: Missing frames directory or CSV file for media {media_id}.")
        continue


    # Upload the media to Zooniverse
    try:
        df = pd.read_csv(frames_csv)
        upload_media(panoptes_client, project, media_id, df, set_active=True)
    except Exception as e:
        print(f"ERROR: Could not upload media {media_id} to Zooniverse.\n{e}")
        continue