In [5]:
import os
import re
import glob
import time
import tqdm
import json
import random
import logging

import requests
from io import BytesIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

In [6]:
def get_expiration(url):
    """
    Calculates the time remaining before a URL expires, based on its "Expires" timestamp.

    Args:
    url (str): The URL to check.

    Returns:
    int: The time remaining before expiration, in seconds. Returns 0 if no "Expires" timestamp is found in the URL.
    """
    
    time_remaining = 0
    
    # Extract expiration timestamp from URL
    match = re.search(r"Expires=(\d+)", url)
    
    if match:
        expiration = int(match.group(1))

        # Calculate time remaining before expiration
        now = int(time.time())
        time_remaining = expiration - now

    return time_remaining


def get_token():
    """
    Retrieves a CoralNet authentication token for API requests.
    
    Returns:
        tuple: A tuple containing the CoralNet token and request headers for authenticated requests.
    
    Raises:
        ValueError: If authentication fails.
    """
    
    # Requirements for authentication
    CORALNET_AUTH = CORALNET_URL + "api/token_auth/"
    HEADERS = {"Content-type" : "application/vnd.api+json"}
    PAYLOAD =  {"username": USERNAME, "password": PASSWORD}

    # Response from CoralNet when provided credentials
    response = requests.post(CORALNET_AUTH, 
                             data=json.dumps(PAYLOAD), 
                             headers=HEADERS)

    if response.ok:

        print("NOTE: Successful authentication")

        # Get the coralnet token returned to the user
        CORALNET_TOKEN = json.loads(response.content.decode())['token']

        # Update the header to contain the user's coralnet token
        HEADERS = {"Authorization": f"Token {CORALNET_TOKEN}", 
                   "Content-type": "application/vnd.api+json"}

    else:
        raise ValueError(f"ERROR: Could not authenticate\n{response.content}")
        
    return CORALNET_TOKEN, HEADERS


def sample_coordinates(width, height, num_samples=200, method='stratified'):
    """
    Generates a set of sample coordinates within a given image size.
    
    Parameters:
    ----------
    width : int
        The width of the image.
    height : int
        The height of the image.
    num_samples : int, optional
        The number of samples to generate. Default is 200.
    method : str, optional
        The method to use for generating samples. Valid values are:
        - 'uniform': generates samples using uniform sampling
        - 'random': generates samples using random sampling
        - 'stratified': generates samples using stratified sampling (default)
    
    Returns:
    -------
    tuple
        A tuple containing three elements:
        - A numpy array of x-coordinates of the generated samples.
        - A numpy array of y-coordinates of the generated samples.
        - A list of dictionaries containing row and column coordinates of the generated samples.
    """
    x_coordinates = []
    y_coordinates = []
    samples = []
    
    if method == 'uniform':
        x_coords = np.linspace(0, width-1, int(np.sqrt(num_samples)))
        y_coords = np.linspace(0, height-1, int(np.sqrt(num_samples)))
        for x in x_coords:
            for y in y_coords:
                x_coordinates.append(int(x))
                y_coordinates.append(int(y))
                samples.append({'row': int(y), 'column': int(x)})
                
    elif method == 'random':
        for i in range(num_samples):
            x = random.randint(0, width-1)
            y = random.randint(0, height-1)
            x_coordinates.append(x)
            y_coordinates.append(y)
            samples.append({'row': y, 'column': x})
            
    elif method == 'stratified':
        n = int(np.sqrt(num_samples))
        x_range = np.linspace(0, width-1, n+1)
        y_range = np.linspace(0, height-1, n+1)
        for i in range(n):
            for j in range(n):
                x = np.random.uniform(x_range[i], x_range[i+1])
                y = np.random.uniform(y_range[j], y_range[j+1])
                x_coordinates.append(int(x))
                y_coordinates.append(int(y))
                samples.append({'row': int(y), 'column': int(x)})
    
    return np.array(x_coordinates), np.array(y_coordinates), samples


def check_job_status(response):
    """
    Sends a request to retrieve the completed annotations and returns the status update.
    
    Parameters:
    ----------
    response : requests.Response
        A Response object returned from a previous request to CoralNet API.
    
    Returns:
    -------
    dict
        A dictionary containing status information, which includes the following keys:
        - 'status': a string indicating the current status of the job, such as "in progress" or "completed"
        - 'message': a string providing additional details about the job status, if available
    """
    
    # Sends a request to retrieve the completed annotations, obtains status update
    status = requests.get(url=f"https://coralnet.ucsd.edu{response.headers['Location']}", 
                      headers={"Authorization": f"Token {CORALNET_TOKEN}"})
    
    current_status = json.loads(status.content) 
    message = None

    if status.status_code == 200:
        
        # Still in progress
        if 'status' in current_status['data'][0]['attributes'].keys(): 

            s = current_status['data'][0]['attributes']['successes'] 
            f = current_status['data'][0]['attributes']['failures'] 
            t = current_status['data'][0]['attributes']['total']
            status_str = current_status['data'][0]['attributes']['status'] 
            ids = current_status['data'][0]['id'].split(",")
            ids = ''.join(str(_) for _ in ids)

            now = time.strftime("%H:%M:%S")

            message = f"Success: {s} Failures: {f} Total: {t} Status: {status_str} IDs: {ids} Time: {now}"

        else:
            # Completed
            message = ""
            
    else:
        # CoralNet is getting too many requests, sleep for a second.
        message = f"WARNING: {current_status['errors'][0]['detail']}; please wait..."
        match = re.search(r'\d+', message)
        wait = int(match.group()) if match else 15
        time.sleep(wait)

    return current_status, message 


def convert_to_csv(response, image_name, output_dir):
    """
    Converts response data into a Pandas DataFrame and concatenates each row into a single DataFrame.
    
    Parameters:
    ----------
    response : dict
        A dictionary object containing response data from a server.
    image_file : str
        The name of the image file corresponding to the response data.
    
    Returns:
    -------
    model_predictions : pandas.DataFrame
        A Pandas DataFrame containing prediction data, where each row represents a single point in the image.
        The columns of the DataFrame include 'image', 'X', 'Y', 'score_*', 'label_id_*', 'label_code_*', and 'label_name_*'.
        The asterisk (*) in the column names represents the index of the classification for each point, starting at 1.
    """
    
    model_predictions = pd.DataFrame()

    for point in response['data'][0]['attributes']['points']:

        per_point = dict()
        per_point['image'] = image_name
        per_point['X'] = point['column']
        per_point['Y'] = point['row']

        for index, classification in enumerate(point['classifications']):

            per_point['score_' + str(index + 1)] = classification['score']
            per_point['label_id_' + str(index + 1)] = classification['label_id']
            per_point['label_code_' + str(index + 1)] = classification['label_code']
            per_point['label_name_' + str(index + 1)] = classification['label_name']

        model_predictions = pd.concat([model_predictions, pd.DataFrame.from_dict([per_point])])
        
    basename = os.path.basename(image_name).split(".")[0]
    output_file = output_dir + basename + ".csv"
    model_predictions.reset_index(drop=True, inplace=True)
    model_predictions.to_csv(output_file, index=True)
    
    if os.path.exists(output_file):
        print(f"NOTE: Predictions for {basename} saved successfully")
    else:
        print(f"ERROR: Could not save predictions for {basename}")
    
    return model_predictions

In [7]:
# URL to CoralNet
CORALNET_URL = 'https://coralnet.ucsd.edu/'

# Desired source provided by user
SOURCE_ID = str(4006)

# URL of the source's image page
IMAGE_URL = CORALNET_URL + f"source/{SOURCE_ID}/browse/images/"

# The model id associated with the source
MODEL_ID = str(34895)

# URL to model of desired source
MODEL_URL = CORALNET_URL + f"api/classifier/{MODEL_ID}/deploy/"

# Username and password provided by user
USERNAME = os.getenv("CORALNET_USERNAME")
PASSWORD = os.getenv("CORALNET_PASSWORD")

# Set the data root
DATA_ROOT = "C://Users/jordan.pierce/Documents/GitHub/CoralNet_Tools/CoralNet_Data/3420/"
OUTPUT_PREDICTIONS = DATA_ROOT + "Predictions/"

# Create a folder to contain predictions
os.makedirs(OUTPUT_PREDICTIONS, exist_ok=True)

In [8]:
CORALNET_TOKEN, HEADERS = get_token()

NOTE: Successful authentication


In [15]:
IMAGES = [{"name": "mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg",
           "url": "https://coralnet-production.s3.amazonaws.com/media/images/m1lyashwj9.jpg?Signature=27N0j%2F%2FGLX%2Fq%2Flv7COl8SkM1ISo%3D&Expires=1683070406&AWSAccessKeyId=AKIAYVKEQ3B4DIOYONO3"}]

In [27]:
active_jobs = []
completed_jobs = []

# Looping through each image requested, sample points, upload to CoralNet for predictions
for image in IMAGES:
    
    # Keep adding jobs as long as there is less than 5 in the active queue
    while len(active_jobs) < 5:
    
        try:
            # Check to see if the image has expired
            if get_expiration(image['url']) <= 100:
                print(f"ERROR: {image['name']} has expired")
                IMAGES.remove(image)
                break

            # Download image
            response = requests.get(image['url'])

            # Read it to get the size
            img = Image.open(BytesIO(response.content))
            width, height = img.size

            # Sample points from image
            x, y, points = sample_coordinates(width, height, 200)

            # Update the IMAGE dict
            image['data'] = {"type": "image",
                             "attributes": 
                                 {
                                    "name": image['name'],
                                    "url" : image['url'], 
                                    "points": points
                                  },
                            }
            print(f"NOTE: Sampled points for {image['name']}")

            # Upload the image and the sampled points to CoralNet
            print(f"NOTE: Attempting to upload {image['name']}")

            # Sends the requests to the `source` and in exchange we receive 
            # a message telling if it was received correctly.
            response = requests.post(url=MODEL_URL, 
                                     data=json.dumps({'data': [image['data']]}, indent=4), 
                                     headers=HEADERS) 
            
            if response.ok:
                # If it was received, add to the current active jobs queue
                print(f"NOTE: Successfully uploaded {image['name']}")
                active_jobs.append([response, image['name']])
            else:
                print(f"ERROR: Failed to upload {image['name']}")
                break

        except:
            print(f"ERROR: Could not download {image['name']}; skipping.")
            
            
    # While there are still active jobs, loop through each and check
    # the current status. Once a job has been completed, output the results
    # and remove it from the queue. Exit the loop after all have finised.
    while active_jobs:
        # Current active job
        for (job, image_name) in active_jobs:
            # Check the status
            current_status, message = check_job_status(job)
            # Current job has finished, ouput the results, remove from queue
            if message == "":
                print(f"NOTE: Completed job for {image_name}")
                convert_to_csv(current_status, image_name, OUTPUT_PREDICTIONS)
                completed_jobs.append(current_status)
                active_jobs.remove([job, image_name])
            else:
                print(message, f" Active Jobs: {len(active_jobs)}")
        # Wait
        time.sleep(10)

        if len(active_jobs) < 5:
            print("NOTE: Adding more images to the queue")
            break

NOTE: Sampled points for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Attempting to upload mcr_

Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21590 Time: 16:40:28  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21591 Time: 16:40:28  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21592 Time: 16:40:29  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21593 Time: 16:40:30  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21589 Time: 16:40:40  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21590 Time: 16:40:41  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21591 Time: 16:40:42  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21592 Time: 16:40:42  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21593 Time: 16:40:43  Active Jobs: 5
NOTE: Completed job for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Predictions for mcr_lter1_fringingreef_pole4-5_qu2_20080415 saved successful

Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21601 Time: 16:43:43  Active Jobs: 5
Success: 0 Failures: 0 Total: 1 Status: Pending IDs: 21602 Time: 16:43:43  Active Jobs: 5
NOTE: Completed job for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Predictions for mcr_lter1_fringingreef_pole4-5_qu2_20080415 saved successfully
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21600 Time: 16:43:56  Active Jobs: 4
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21601 Time: 16:43:56  Active Jobs: 4
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21602 Time: 16:43:57  Active Jobs: 4
NOTE: Adding more images to the queue
NOTE: Sampled points for mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole4-5_qu2_20080415.jpg
Success: 0 Failures: 0 Total: 1 Status: In Progress IDs: 21599 Time: 16:44:09  Active Jobs: 5
Success: 0 Failures: 