In [None]:
import os
import glob
import shutil
from tqdm import tqdm
from datetime import datetime

import numpy as np
import pandas as pd

import tator
import panoptes_client

In [None]:
def get_now():
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

### Configs

In [None]:
username = os.getenv('ZOONIVERSE_USERNAME')
password = os.getenv('ZOONIVERSE_PASSWORD')

zoon_project_id = 21853

try:
    # Login to panoptes using username and password
    panoptes_client.Panoptes.connect(username=username, password=password)
    print(f"NOTE: Authentication to Zooniverse successful for {username}")
except Exception as e:
    raise Exception(f"ERROR: Could not login to Panoptes for {username}\n{e}")

try:
    # Get access to the Zooniverse project given the provided credentials
    project = panoptes_client.Project.find(id=zoon_project_id)
    print(f"NOTE: Connected to Zooniverse project '{project.title}' successfully")
except Exception as e:
    raise Exception(f"ERROR: Could not access project {zoon_project_id}.\n{e}")

In [None]:
token = os.getenv('TATOR_TOKEN')
project_id = 70

try:
    # Get the TATOR api given the provided token
    api = tator.get_api(host='https://cloud.tator.io', token=token)
    # Get the correct type of localization for the project (bounding box, attributes)
    tator_project_id = project_id
    state_type_id = 288  # State Type (ROV)
    print(f"NOTE: Authentication to TATOR successful for {api.whoami().username}")
except Exception as e:
    raise Exception(f"ERROR: Could not obtain needed information from TATOR.\n{e}")

### Get Reduced Season N Dataframe

In [None]:
import os

# Extract the shapes for the workflow
csv_path = "../data/classification_csv/click-a-coral-classifications_season_n.csv"
csv_path = os.path.abspath(csv_path)

In [None]:
# Read the CSV file
df = pd.read_csv(csv_path)

# Remove the private, ground truth workflow
df = df[df['workflow_id'] != 26984]

# Save Season 1
season_1_df = df[(df['workflow_id'] == 25828) & (df['workflow_version'] == 355.143)]
season_1_df.to_csv(csv_path.replace("season_n", "season_1"), index=False)
# Remove Season 1
df = df[df['workflow_id'] != 25828]

# Save Season 2
season_2_df = df[(df['workflow_id'] == 26428) & (df['workflow_version'] == 16.18)]
season_2_df.to_csv(csv_path.replace("season_n", "season_2"), index=False)
# Remove Season 2
df = df[df['workflow_version'] > 16.18]

# Save Season 3
season_3_df = df[(df['workflow_id'] == 26428) & (df['workflow_version'] == 48.28)]
season_3_df.to_csv(csv_path.replace("season_n", "season_3"), index=False)

In [None]:
from cac.from_zooniverse import ZooniverseProcessor

# Extract args
workflow_id = 25828
version = 355.143

output_dir = "../data/reduced/Season_1"
output_dir = os.path.abspath(output_dir)
os.makedirs(output_dir, exist_ok=True)

csv_path = csv_path.replace("season_n", "season_1")

In [None]:
# Create a ZooniverseProcessor instance and process the data
processor = ZooniverseProcessor(csv_path, output_dir, workflow_id, version)

# Clean the classification csv, convert to a dataframe for creating training data
df, path = processor.clean_csv_file()

### Move Zipped Curated to Reduced Season Folder

In [None]:
media_ids = df['Media ID'].unique().astype(str).tolist()

In [None]:
curated_path = os.path.abspath("../data/curated")

for media_id in media_ids:
    # Assert that the zip file exists
    zip_path = os.path.join(curated_path, f"{media_id}.zip")
    if not os.path.exists(zip_path):
        raise Exception(f"ERROR: Could not find zip file for media {media_id} at {zip_path}.")

In [None]:
temp_path = os.path.abspath("../data/reduced/Season_2/media")
os.makedirs(temp_path, exist_ok=True)

for media_id in tqdm(media_ids, desc="Unzipping media files"):
    # Unzip the media
    zip_path = os.path.join(curated_path, f"{media_id}.zip")
    dst_path = os.path.join(temp_path, media_id)
    
    # Check if zip exists and destination doesn't exist yet
    if not os.path.exists(zip_path):
        raise Exception(f"ERROR: Could not find zip file for media {media_id} at {zip_path}.")
        
    if os.path.exists(dst_path):
        print(f"NOTE: Directory already exists for {media_id}, skipping unzip")
        continue
        
    # Create destination directory
    os.makedirs(dst_path, exist_ok=True)
    
    try:
        # Extract directly to the media_id subfolder
        print(f"NOTE: Unzipping {zip_path} to {dst_path}")
        shutil.unpack_archive(zip_path, dst_path, 'zip')
        
        # Check if files were extracted to a subfolder with media_id name inside dst_path
        # If so, move them up to dst_path
        nested_dir = os.path.join(dst_path, media_id)
        if os.path.exists(nested_dir) and os.path.isdir(nested_dir):
            for item in os.listdir(nested_dir):
                shutil.move(os.path.join(nested_dir, item), dst_path)
            os.rmdir(nested_dir)  # Remove the now-empty nested directory
    except Exception as e:
        print(f"WARNING: Issue with unpacking {media_id}: {str(e)}")
        continue
    
    # Check if the frames directory exists
    frames_dir = os.path.join(dst_path, "frames")
    frames_csv = os.path.join(dst_path, "frames.csv")
    
    if not os.path.exists(frames_dir):
        print(f"ERROR: Could not find frames directory for media {media_id} at {frames_dir}.")
        
    if not os.path.exists(frames_csv):
        print(f"ERROR: Could not find frames.csv for media {media_id} at {frames_csv}.")
