In [2]:
import os
import glob
import shutil
from tqdm import tqdm
from datetime import datetime

import numpy as np
import pandas as pd

import tator
import panoptes_client

In [3]:
def get_now():
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

### Configs

In [4]:
username = os.getenv('ZOONIVERSE_USERNAME')
password = os.getenv('ZOONIVERSE_PASSWORD')

zoon_project_id = 21853

try:
    # Login to panoptes using username and password
    panoptes_client.Panoptes.connect(username=username, password=password)
    print(f"NOTE: Authentication to Zooniverse successful for {username}")
except Exception as e:
    raise Exception(f"ERROR: Could not login to Panoptes for {username}\n{e}")

try:
    # Get access to the Zooniverse project given the provided credentials
    project = panoptes_client.Project.find(id=zoon_project_id)
    print(f"NOTE: Connected to Zooniverse project '{project.title}' successfully")
except Exception as e:
    raise Exception(f"ERROR: Could not access project {zoon_project_id}.\n{e}")

NOTE: Authentication to Zooniverse successful for Jordan-Pierce
NOTE: Connected to Zooniverse project 'Click-a-Coral' successfully


In [5]:
token = os.getenv('TATOR_TOKEN')
project_id = 70

try:
    # Get the TATOR api given the provided token
    api = tator.get_api(host='https://cloud.tator.io', token=token)
    # Get the correct type of localization for the project (bounding box, attributes)
    tator_project_id = project_id
    state_type_id = 288  # State Type (ROV)
    print(f"NOTE: Authentication to TATOR successful for {api.whoami().username}")
except Exception as e:
    raise Exception(f"ERROR: Could not obtain needed information from TATOR.\n{e}")

NOTE: Authentication to TATOR successful for jordan.pierce


### Get Reduced Season N Dataframe

In [26]:
import os
from cac.from_zooniverse import clean_csv_file

# Extract args
workflow_id = 26428
version = 16.18

# Extract the shapes for the workflow
csv_path = "../data/classification_csv/click-a-coral-classifications_season_3.csv"
csv_path = os.path.abspath(csv_path)

output_dir = "../data/reduced/Season_2"
output_dir = os.path.abspath(output_dir)
os.makedirs(output_dir, exist_ok=True)

In [24]:
# Clean the classification csv, convert to a dataframe for creating training data
df, path = clean_csv_file(csv_path, output_dir, workflow_id, version)

Cleaning Data: 100%|██████████| 34083/34083 [00:36<00:00, 942.65it/s] 


### Move Zipped Curated to Reduced Season Folder

In [28]:
media_ids = df['Media ID'].unique().astype(str).tolist()

In [29]:
curated_path = os.path.abspath("../data/curated")

for media_id in media_ids:
    # Assert that the zip file exists
    zip_path = os.path.join(curated_path, f"{media_id}.zip")
    if not os.path.exists(zip_path):
        raise Exception(f"ERROR: Could not find zip file for media {media_id} at {zip_path}.")

In [30]:
temp_path = os.path.abspath("../data/reduced/Season_2/media")
os.makedirs(temp_path, exist_ok=True)

for media_id in tqdm(media_ids, desc="Unzipping media files"):
    # Unzip the media
    zip_path = os.path.join(curated_path, f"{media_id}.zip")
    dst_path = os.path.join(temp_path, media_id)
    
    # Check if zip exists and destination doesn't exist yet
    if not os.path.exists(zip_path):
        raise Exception(f"ERROR: Could not find zip file for media {media_id} at {zip_path}.")
        
    if os.path.exists(dst_path):
        print(f"NOTE: Directory already exists for {media_id}, skipping unzip")
        continue
        
    # Create destination directory
    os.makedirs(dst_path, exist_ok=True)
    
    try:
        # Extract directly to the media_id subfolder
        print(f"NOTE: Unzipping {zip_path} to {dst_path}")
        shutil.unpack_archive(zip_path, dst_path, 'zip')
        
        # Check if files were extracted to a subfolder with media_id name inside dst_path
        # If so, move them up to dst_path
        nested_dir = os.path.join(dst_path, media_id)
        if os.path.exists(nested_dir) and os.path.isdir(nested_dir):
            for item in os.listdir(nested_dir):
                shutil.move(os.path.join(nested_dir, item), dst_path)
            os.rmdir(nested_dir)  # Remove the now-empty nested directory
    except Exception as e:
        print(f"WARNING: Issue with unpacking {media_id}: {str(e)}")
        continue
    
    # Check if the frames directory exists
    frames_dir = os.path.join(dst_path, "frames")
    frames_csv = os.path.join(dst_path, "frames.csv")
    
    if not os.path.exists(frames_dir):
        print(f"ERROR: Could not find frames directory for media {media_id} at {frames_dir}.")
        
    if not os.path.exists(frames_csv):
        print(f"ERROR: Could not find frames.csv for media {media_id} at {frames_csv}.")


Unzipping media files:   0%|          | 0/24 [00:00<?, ?it/s]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4306979.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4306979


Unzipping media files:   4%|▍         | 1/24 [00:04<01:48,  4.71s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4351615.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4351615


Unzipping media files:   8%|▊         | 2/24 [00:06<01:02,  2.84s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4363043.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4363043


Unzipping media files:  12%|█▎        | 3/24 [00:07<00:46,  2.20s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4377232.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4377232


Unzipping media files:  17%|█▋        | 4/24 [00:09<00:41,  2.07s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4358705.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4358705


Unzipping media files:  21%|██        | 5/24 [00:17<01:22,  4.35s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4356486.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4356486


Unzipping media files:  25%|██▌       | 6/24 [00:26<01:43,  5.72s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4352385.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4352385


Unzipping media files:  29%|██▉       | 7/24 [00:27<01:13,  4.30s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4355716.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4355716


Unzipping media files:  33%|███▎      | 8/24 [00:32<01:09,  4.32s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4378753.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4378753


Unzipping media files:  38%|███▊      | 9/24 [00:35<00:58,  3.92s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4353685.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4353685


Unzipping media files:  42%|████▏     | 10/24 [00:37<00:47,  3.38s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4356011.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4356011


Unzipping media files:  46%|████▌     | 11/24 [00:41<00:48,  3.75s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4364682.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4364682


Unzipping media files:  50%|█████     | 12/24 [00:45<00:45,  3.75s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4376903.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4376903


Unzipping media files:  54%|█████▍    | 13/24 [00:46<00:30,  2.78s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4355499.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4355499


Unzipping media files:  58%|█████▊    | 14/24 [00:49<00:28,  2.88s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4361202.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4361202


Unzipping media files:  62%|██████▎   | 15/24 [00:49<00:20,  2.22s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4363870.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4363870


Unzipping media files:  67%|██████▋   | 16/24 [00:51<00:17,  2.16s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4350057.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4350057


Unzipping media files:  71%|███████   | 17/24 [00:53<00:14,  2.05s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4375540.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4375540


Unzipping media files:  75%|███████▌  | 18/24 [00:55<00:11,  1.90s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4362355.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4362355


Unzipping media files:  79%|███████▉  | 19/24 [00:55<00:07,  1.52s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4356345.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4356345


Unzipping media files:  83%|████████▎ | 20/24 [00:56<00:05,  1.26s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4346978.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4346978


Unzipping media files:  88%|████████▊ | 21/24 [00:57<00:03,  1.21s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4291551.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4291551


Unzipping media files:  92%|█████████▏| 22/24 [00:59<00:02,  1.28s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4287966.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4287966


Unzipping media files:  96%|█████████▌| 23/24 [01:03<00:02,  2.11s/it]

NOTE: Unzipping e:\JordanP\Click-a-Coral\data\curated\4292439.zip to e:\JordanP\Click-a-Coral\data\reduced\Season_2\media\4292439


Unzipping media files: 100%|██████████| 24/24 [01:09<00:00,  2.90s/it]
