In [1]:
import os
import re
import glob
import time
import tqdm
import json
import random
import logging

import requests
from io import BytesIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

from Download_CoralNet import *

In [19]:
def get_expiration(url):
    """
    Calculates the time remaining before a URL expires, based on its "Expires" timestamp.

    Args:
    url (str): The URL to check.

    Returns:
    int: The time remaining before expiration, in seconds. Returns 0 if no "Expires" timestamp is found in the URL.
    """
    
    time_remaining = 0
    
    # Extract expiration timestamp from URL
    match = re.search(r"Expires=(\d+)", url)
    
    if match:
        expiration = int(match.group(1))

        # Calculate time remaining before expiration
        now = int(time.time())
        time_remaining = expiration - now

    return time_remaining


def get_token(username, password):
    """
    Retrieves a CoralNet authentication token for API requests.
    
    Returns:
        tuple: A tuple containing the CoralNet token and request headers for authenticated requests.
    
    Raises:
        ValueError: If authentication fails.
    """
    
    # Requirements for authentication
    CORALNET_AUTH = CORALNET_URL + "/api/token_auth/"
    HEADERS = {"Content-type" : "application/vnd.api+json"}
    PAYLOAD =  {"username": username, "password": password}

    # Response from CoralNet when provided credentials
    response = requests.post(CORALNET_AUTH, 
                             data=json.dumps(PAYLOAD), 
                             headers=HEADERS)

    if response.ok:

        print("NOTE: Successful authentication")

        # Get the coralnet token returned to the user
        CORALNET_TOKEN = json.loads(response.content.decode())['token']

        # Update the header to contain the user's coralnet token
        HEADERS = {"Authorization": f"Token {CORALNET_TOKEN}", 
                   "Content-type": "application/vnd.api+json"}

    else:
        raise ValueError(f"ERROR: Could not authenticate\n{response.content}")
        
    return CORALNET_TOKEN, HEADERS


def sample_points_for_url(url, num_samples=200, method='stratified'):
    """
    Generates a set of sample coordinates within a given image size.
    
    Parameters:
    ----------
    width : int
        The width of the image.
    height : int
        The height of the image.
    num_samples : int, optional
        The number of samples to generate. Default is 200.
    method : str, optional
        The method to use for generating samples. Valid values are:
        - 'uniform': generates samples using uniform sampling
        - 'random': generates samples using random sampling
        - 'stratified': generates samples using stratified sampling (default)
    
    Returns:
    -------
    tuple
        A tuple containing three elements:
        - A numpy array of x-coordinates of the generated samples.
        - A numpy array of y-coordinates of the generated samples.
        - A list of dictionaries containing row and column coordinates of the generated samples.
    """
    
    if get_expiration(url) <= 500:
        raise Exception(f"ERROR: URL is expiring soon; skipping.\n{url}")
        
    else:
        # Request the image from AWS
        response = requests.get(url)

        # Read it to get the size
        img = Image.open(BytesIO(response.content))
        width, height = img.size
        
        x_coordinates = []
        y_coordinates = []
        samples = []

        if method == 'uniform':
            x_coords = np.linspace(0, width-1, int(np.sqrt(num_samples)))
            y_coords = np.linspace(0, height-1, int(np.sqrt(num_samples)))
            for x in x_coords:
                for y in y_coords:
                    x_coordinates.append(int(x))
                    y_coordinates.append(int(y))
                    samples.append({'row': int(y), 'column': int(x)})

        elif method == 'random':
            for i in range(num_samples):
                x = random.randint(0, width-1)
                y = random.randint(0, height-1)
                x_coordinates.append(x)
                y_coordinates.append(y)
                samples.append({'row': y, 'column': x})

        elif method == 'stratified':
            n = int(np.sqrt(num_samples))
            x_range = np.linspace(0, width-1, n+1)
            y_range = np.linspace(0, height-1, n+1)
            for i in range(n):
                for j in range(n):
                    x = np.random.uniform(x_range[i], x_range[i+1])
                    y = np.random.uniform(y_range[j], y_range[j+1])
                    x_coordinates.append(int(x))
                    y_coordinates.append(int(y))
                    samples.append({'row': int(y), 'column': int(x)})

        x = np.array(x_coordinates).astype(int)
        y = np.array(y_coordinates).astype(int)
    
    return x, y, samples


def check_job_status(response):
    """
    Sends a request to retrieve the completed annotations and returns the status update.
    
    Parameters:
    ----------
    response : requests.Response
        A Response object returned from a previous request to CoralNet API.
    
    Returns:
    -------
    dict
        A dictionary containing status information, which includes the following keys:
        - 'status': a string indicating the current status of the job, such as "in progress" or "completed"
        - 'message': a string providing additional details about the job status, if available
    """
    
    # Sends a request to retrieve the completed annotations, obtains status update
    status = requests.get(url=f"https://coralnet.ucsd.edu{response.headers['Location']}", 
                      headers={"Authorization": f"Token {CORALNET_TOKEN}"})
    
    current_status = json.loads(status.content) 
    message = None

    if status.status_code == 200:
        
        # Still in progress
        if 'status' in current_status['data'][0]['attributes'].keys(): 

            s = current_status['data'][0]['attributes']['successes'] 
            f = current_status['data'][0]['attributes']['failures'] 
            t = current_status['data'][0]['attributes']['total']
            status_str = current_status['data'][0]['attributes']['status'] 
            ids = current_status['data'][0]['id'].split(",")
            ids = ''.join(str(_) for _ in ids)

            now = time.strftime("%H:%M:%S")

            message = f"Success: {s} Failures: {f} Total: {t} Status: {status_str} IDs: {ids} Time: {now}"

        else:
            # Completed
            message = ""
            
    else:
        # CoralNet is getting too many requests, sleep for a second.
        message = f"WARNING: {current_status['errors'][0]['detail']}; please wait..."
        match = re.search(r'\d+', message)
        wait = int(match.group()) if match else 15
        time.sleep(wait)

    return current_status, message 


def convert_to_csv(response, image_name, output_dir):
    """
    Converts response data into a Pandas DataFrame and concatenates each row into a single DataFrame.
    
    Parameters:
    ----------
    response : dict
        A dictionary object containing response data from a server.
    image_file : str
        The name of the image file corresponding to the response data.
    
    Returns:
    -------
    model_predictions : pandas.DataFrame
        A Pandas DataFrame containing prediction data, where each row represents a single point in the image.
        The columns of the DataFrame include 'image', 'X', 'Y', 'score_*', 'label_id_*', 'label_code_*', and 'label_name_*'.
        The asterisk (*) in the column names represents the index of the classification for each point, starting at 1.
    """
    
    model_predictions = pd.DataFrame()

    for point in response['data'][0]['attributes']['points']:

        per_point = dict()
        per_point['image'] = image_name
        per_point['X'] = point['column']
        per_point['Y'] = point['row']

        for index, classification in enumerate(point['classifications']):

            per_point['score_' + str(index + 1)] = classification['score']
            per_point['label_id_' + str(index + 1)] = classification['label_id']
            per_point['label_code_' + str(index + 1)] = classification['label_code']
            per_point['label_name_' + str(index + 1)] = classification['label_name']

        model_predictions = pd.concat([model_predictions, pd.DataFrame.from_dict([per_point])])
        
    basename = os.path.basename(image_name).split(".")[0]
    output_file = output_dir + basename + ".csv"
    model_predictions.reset_index(drop=True, inplace=True)
    model_predictions.to_csv(output_file, index=True)
    
    if os.path.exists(output_file):
        print(f"NOTE: Predictions for {basename} saved successfully")
    else:
        print(f"ERROR: Could not save predictions for {basename}")
    
    return model_predictions

In [17]:
# Username and password provided by user
USERNAME = os.getenv("CORALNET_USERNAME")
PASSWORD = os.getenv("CORALNET_PASSWORD")

# Verify that the username and password are valid
CORALNET_TOKEN, HEADERS = get_token(USERNAME, PASSWORD)

# Desired source provided by user
SOURCE_ID = str(4006)

# Set the data root
DATA_ROOT = "C://Users/jordan.pierce/Documents/GitHub/CoralNet_Tools/CoralNet_Data/4006/"
OUTPUT_PREDICTIONS = DATA_ROOT + "predictions/"

# Create a folder to contain predictions and points
os.makedirs(OUTPUT_PREDICTIONS, exist_ok=True)

NOTE: Successful authentication


In [51]:
# Variables for the model
metadata = get_model_meta(SOURCE_ID, USERNAME, PASSWORD)
MODEL_ID = metadata['Model_ID'][0]
MODEL_URL = CORALNET_URL + f"/api/classifier/{MODEL_ID}/deploy/"

# Images associated with the source
images = get_images(SOURCE_ID, USERNAME, PASSWORD)

Downloading Metadata...
Crawling for Images...


In [52]:
images = images.sample(7, replace=False)
images

Unnamed: 0,image_name,image_page,image_url
10,mcr_lter1_fringingreef_pole2-3_qu3_20080415.jpg,https://coralnet.ucsd.edu/image/3370053/view/,https://coralnet-production.s3.amazonaws.com:4...
0,mcr_lter1_fringingreef_pole1-2_qu1_20080415.jpg,https://coralnet.ucsd.edu/image/3370043/view/,https://coralnet-production.s3.amazonaws.com:4...
11,mcr_lter1_fringingreef_pole2-3_qu4_20080415.jpg,https://coralnet.ucsd.edu/image/3370054/view/,https://coralnet-production.s3.amazonaws.com:4...
19,mcr_lter1_fringingreef_pole3-4_qu4_20080415.jpg,https://coralnet.ucsd.edu/image/3370062/view/,https://coralnet-production.s3.amazonaws.com:4...
16,mcr_lter1_fringingreef_pole3-4_qu1_20080415.jpg,https://coralnet.ucsd.edu/image/3370059/view/,https://coralnet-production.s3.amazonaws.com:4...
6,mcr_lter1_fringingreef_pole1-2_qu7_20080415.jpg,https://coralnet.ucsd.edu/image/3370049/view/,https://coralnet-production.s3.amazonaws.com:4...
4,mcr_lter1_fringingreef_pole1-2_qu5_20080415.jpg,https://coralnet.ucsd.edu/image/3370047/view/,https://coralnet-production.s3.amazonaws.com:4...


In [45]:
active_jobs = []
completed_jobs = []

# Looping through each image requested, sample points, upload to CoralNet for predictions
for index, row in images.iterrows():
    
    # Keep adding jobs as long as there is less than 5 in the active queue
    if len(active_jobs) < 5:
    
        try:

            # Sample points from image
            x, y, points = sample_points_for_url(row['image_url'], 200)

            # Create a payload for the current image
            payload = {}
            payload['data'] = [{"type": "image",
                                "attributes": 
                                 {
                                    "name": row['image_name'],
                                    "url" : row['image_url'], 
                                    "points": points
                                  },
                              }]
            print(f"NOTE: Sampled points for {row['image_name']}")

            # Upload the image and the sampled points to CoralNet
            print(f"NOTE: Attempting to upload {row['image_name']}")

            # Sends the requests to the `source` and in exchange we receive 
            # a message telling if it was received correctly.
            response = requests.post(url=MODEL_URL, 
                                     data=json.dumps(payload, indent=4), 
                                     headers=HEADERS) 
            
            if response.ok:
                # If it was received, add to the current active jobs queue
                print(f"NOTE: Successfully uploaded {row['image_name']}")
                active_jobs.append([response, row['image_name']])
            else:
                print(f"ERROR: Failed to upload {row['image_name']}")
                print(f"ERROR: {response.content}")
                break

        except Exception as e:
            print(f"ERROR: Could not download {row['image_name']}; skipping.\n{e}")
            
    else:    
        # While there are still active jobs, loop through each and check
        # the current status. Once a job has been completed, output the results
        # and remove it from the queue. Exit the loop after all have finised.
        while active_jobs:
            # Current active job
            for (job, image_name) in active_jobs:
                # Check the status
                current_status, message = check_job_status(job)
                # Current job has finished, ouput the results, remove from queue
                if message == "":
                    print(f"NOTE: Completed job for {image_name}")
                    convert_to_csv(current_status, image_name, OUTPUT_PREDICTIONS)
                    completed_jobs.append(current_status)
                    active_jobs.remove([job, image_name])
                else:
                    print(message, f" Active Jobs: {len(active_jobs)}")
            # Wait
            time.sleep(10)

            if len(active_jobs) < 5:
                print("NOTE: Adding more images to the queue")
                break

NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Successfully uploaded mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu1_20080415.jpg
NOTE: Attempting to upload mcr_

KeyboardInterrupt: 

In [53]:
active_jobs = []
completed_jobs = []
queued_jobs = []

# Loop through each image requested, sample points, upload to CoralNet for predictions
for index, row in images.iterrows():

    # If there are less than 5 active jobs and less than 5 queued jobs, add the current job to the queue
    if len(active_jobs) < 5 and len(queued_jobs) < 5:

        try:
            # Sample points from image
            x, y, points = sample_points_for_url(row['image_url'], 200)

            # Create a payload for the current image
            payload = {}
            payload['data'] = [{"type": "image",
                                "attributes": 
                                 {
                                    "name": row['image_name'],
                                    "url" : row['image_url'], 
                                    "points": points
                                  },
                              }]
            print(f"NOTE: Sampled points for {row['image_name']}")

            # Upload the image and the sampled points to CoralNet
            print(f"NOTE: Attempting to upload {row['image_name']}")

            # Sends the requests to the `source` and in exchange we receive 
            # a message telling if it was received correctly.
            response = requests.post(url=MODEL_URL, 
                                     data=json.dumps(payload, indent=4), 
                                     headers=HEADERS) 

            if response.ok:
                # If it was received, add to the queued jobs
                print(f"NOTE: Successfully queued {row['image_name']}")
                queued_jobs.append([response, row['image_name']])
            else:
                print(f"ERROR: Failed to upload {row['image_name']}")
                print(f"ERROR: {response.content}")
                break

        except Exception as e:
            print(f"ERROR: Could not download {row['image_name']}; skipping.\n{e}")

    else:
        # While there are still active jobs, loop through each and check
        # the current status. Once a job has been completed, output the results
        # and remove it from the queue. Exit the loop after all have finished.
        while active_jobs:
            # Current active job
            for (job, image_name) in active_jobs:
                # Check the status
                current_status, message = check_job_status(job)
                # Current job has finished, output the results, remove from queue
                if message == "":
                    print(f"NOTE: Completed job for {image_name}")
                    convert_to_csv(current_status, image_name, OUTPUT_PREDICTIONS)
                    completed_jobs.append(current_status)
                    active_jobs.remove([job, image_name])
                else:
                    print(message, f" Active Jobs: {len(active_jobs)}")
            # Wait
            time.sleep(10)

            # If there are less than 5 active jobs and there are queued jobs, move a job from the queued jobs to the active jobs
            if len(active_jobs) < 5 and queued_jobs:
                print("NOTE: Starting a queued job")
                job = queued_jobs.pop(0)
                active_jobs.append(job)
                print(f"NOTE: Started job for {job[1]}")
                break


NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu3_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole2-3_qu3_20080415.jpg
NOTE: Successfully queued mcr_lter1_fringingreef_pole2-3_qu3_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole1-2_qu1_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole1-2_qu1_20080415.jpg
NOTE: Successfully queued mcr_lter1_fringingreef_pole1-2_qu1_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole2-3_qu4_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole2-3_qu4_20080415.jpg
NOTE: Successfully queued mcr_lter1_fringingreef_pole2-3_qu4_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole3-4_qu4_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fringingreef_pole3-4_qu4_20080415.jpg
NOTE: Successfully queued mcr_lter1_fringingreef_pole3-4_qu4_20080415.jpg
NOTE: Sampled points for mcr_lter1_fringingreef_pole3-4_qu1_20080415.jpg
NOTE: Attempting to upload mcr_lter1_fr

In [55]:
check_job_status(job)

({'data': [{'type': 'image',
    'id': 'https://coralnet-production.s3.amazonaws.com:443/media/images/z6jaxn2xoe.jpg?Signature=ynTUyc9mXDPWZHWyR0uBJMELZGA%3D&Expires=1683156344&AWSAccessKeyId=AKIAYVKEQ3B4DIOYONO3',
    'attributes': {'url': 'https://coralnet-production.s3.amazonaws.com:443/media/images/z6jaxn2xoe.jpg?Signature=ynTUyc9mXDPWZHWyR0uBJMELZGA%3D&Expires=1683156344&AWSAccessKeyId=AKIAYVKEQ3B4DIOYONO3',
     'points': [{'row': 92,
       'column': 128,
       'classifications': [{'score': 0.8708003052685906,
         'label_id': 85,
         'label_code': 'Off',
         'label_name': 'Off'},
        {'score': 0.060498272221681755,
         'label_id': 101,
         'label_code': 'CCA',
         'label_name': 'CCA (crustose coralline algae)'},
        {'score': 0.027582158378669806,
         'label_id': 74,
         'label_code': 'Porit',
         'label_name': 'Porites'},
        {'score': 0.023407960283227735,
         'label_id': 82,
         'label_code': 'Turf',
        