In [51]:
import numpy as np
import pandas as pd
import pyworms
import requests
import json
import os
import pyworms
import math

In [52]:
def get_datasets(dashboard_url):
    """Return a list dashboard datasets from the API

    Args:
        dashboard_url (str): base url of an IFCB dashboard (V2)
    
    Returns:
        list: list of dataset names
    """
    request_rul = os.path.join(dashboard_url, 'api/filter_options')
    response=requests.get(request_rul)
    content=response.content
    content=json.loads(content)
    
    return content['dataset_options']

def get_bins_in_range(start_date, end_date, dataset_name, base_dashboard_url='https://ifcb.caloos.org'):
    """ Given a start date and end date, request all of the ifcb sampled from a given instrument feed

    Args:
        start_date (str): Start date string in the form of yyyy-mm-dd
        end_date (str): End date string in the form of yyyy-mm-dd
    Returns: 
        (pd.DataFrame): dataframe with a series of bin ids 
    """
    # Dates should be of the 
    url = f"{base_dashboard_url}/{dataset_name}/api/feed/temperature/start/{start_date}/end/{end_date}"
    response=requests.get(url)
    
    if response.status_code==200:
        content=response.content
        content=json.loads(content)
        content=pd.DataFrame.from_dict(content)
        content["pid"]=content["pid"].map(lambda x: x.lstrip(f"{base_dashboard_url}/{dataset_name}/"))
        content=content["pid"]
        return(content)
    
    else:
        print('Failed to get all bins with range with code: '+response.status_code)
        return(response.status_code)
    
    
def get_ifcb_metadata(bin, base_dashboard_url='https://ifcb.caloos.org'):    
    """ Return metadata for a given bin using the dashboard API (V2)
    Args:
        bin (str): bin id to get metadata. Bins are in the form of DYYYYMMDDTHHmmss_IFCBNNN (ex. D20230717T000942_IFCB104)
        base_dashboard_url (str): base url of an IFCB dashboard (V2)
    Returns:
        dict: Returns a dictionary of metadata for a given bin
    """
    request_url = f"{base_dashboard_url}/api/metadata/{bin}"
    1
    response=requests.get(request_url)
    
    if response.status_code==200:
        content=response.content
        content=json.loads(content)
    
    else:
        print("Metadata GET request failed with code: "+str(response.status_code))
        content = response.status_code
        
    return content

def get_bin_details(bin, base_dashboard_url='https://ifcb.caloos.org'):
    """ Return information about a bin using the dashboard API (V2)

    Args:
        bin (str): bin id to get metadata. Bins are in the form of DYYYYMMDDTHHmmss_IFCBNNN (ex. D20230717T000942_IFCB104)
        base_dashboard_url (str, optional): _description_. Defaults to 'https://ifcb.caloos.org'.

    Returns:
        _type_: _description_
    """
    url = f"{base_dashboard_url}/api/bin/{bin}"
    
    response=requests.get(url)
    
    if response.status_code==200:
        content = response.content
        content = json.loads(content)
        # newdict = {"bin_id":bin}
        # newdict.update(content)
        content
    
    else:
        print("Bin neighbors GET request failed with code: "+str(response.status_code))
        content = None
    return content

def bin_has_autoclass(bin, base_dashboard_url='https://ifcb.caloos.org'):
    """return boolean if a bin has autoclassification results

    Args:
        bin (str): bin id to get metadata. Bins are in the form of DYYYYMMDDTHHmmss_IFCBNNN (ex. D20230717T000942_IFCB104)
        base_dashboard_url (str, optional): base url of an IFCB dashboard (V2). Defaults to 'https://ifcb.caloos.org'.

    Returns:
        boolean: True if dahsboard has autoclassification results for bin, False otherwise
    """
   
    url = f"{base_dashboard_url}/api/has_products/{bin}"
    response=requests.get(url)
    
    if response.status_code==200:
        content=response.content
        content=json.loads(content)       
        class_score = content['has_class_scores']
    
    else:
        print('Autclass GET faile with code: '+str(response.status_code))
        class_score = None
        
    return class_score

def get_autoclass_data(bin,dataset_name, base_url='https://ifcb.caloos.org'):
    """Return autoclassification data for a given bin

    Args:
        bin (str): bin id to get metadata. Bins are in the form of DYYYYMMDDTHHmmss_IFCBNNN (ex. D20230717T000942_IFCB104)
        base_url (str, optional): base url of an IFCB dashboard (V2). Defaults to 'https://ifcb.caloos.org'.

    Returns:
        dict: dictionary of autoclassification data
    """
    try:
        url = f"{base_url}/{dataset_name}/{bin}_class_scores.csv"
        df = pd.read_csv(url)
        
    except Exception as e:
        print(f"Failed to get autoclassification data for bin {bin} with error: {e}")
        df = None
        
    return df

def get_feature_file(bin, dataset_name, base_url='https://ifcb.caloos.org'):
    """Return feature file for a given bin

    Args:
        bin (str): bin id to get metadata. Bins are in the form of DYYYYMMDDTHHmmss_IFCBNNN (ex. D20230717T000942_IFCB104)
        base_url (str, optional): base url of an IFCB dashboard (V2). Defaults to 'https://ifcb.caloos.org'.

    Returns:
        pd.DataFrame: feature file for a given bin
    """
    try:
        url = f"{base_url}/{dataset_name}/{bin}_features.csv"
        df = pd.read_csv(url)
        
    except Exception as e:
        print(f"Failed to get feature file for bin {bin} with error: {e}")
        df = None
        
    return df


In [53]:
base_url='https://ifcb.caloos.org/'

dataset_names = get_datasets(base_url)
scw_datset = dataset_names[12]
dataset_names

['bloofinz-io',
 'bodega-marine-lab',
 'calcofi-cruises-ctd',
 'calcofi-cruises-underway',
 'cal-poly-humboldt-hioc',
 'cce-lter-process-cruises-ifcb-151',
 'del-mar-mooring',
 'mbari-power-buoy',
 'newport-beach-pier',
 'plumes-and-blooms-cruises',
 'san-francisco-bay-cruises',
 'san-francisco-pier-17',
 'santa-cruz-municipal-wharf',
 'scripps-pier-ifcb-151',
 'scripps-pier-ifcb-158',
 'scripps-pier-ifcb-183',
 'stearns-wharf']

In [54]:
DATASET = scw_datset
start_date = "2023-07-17"
end_date = "2023-07-18"

range_response = get_bins_in_range(start_date=start_date, end_date=end_date, dataset_name=DATASET)
print(f"n files: {len(range_response)}\n{range_response.head()}")

n files: 58
0    D20230717T000942_IFCB104
1    D20230717T003329_IFCB104
2    D20230717T005715_IFCB104
3    D20230717T012101_IFCB104
4    D20230717T014447_IFCB104
Name: pid, dtype: object


Test run for all pids but mostly all the same.

In [56]:
lr=len(range_response)
ia=0
for n in np.arange(0,lr):
    if bin_has_autoclass(bin=range_response.iloc[n]):
        mtmp = get_ifcb_metadata(range_response.iloc[n])
        mtmp=pd.DataFrame.from_dict(mtmp)
        mtmp=mtmp.T
        ac=get_autoclass_data(bin=range_response.iloc[n],dataset_name=DATASET)
        if ia==0:
            metadatavals=mtmp
            auto_class=ac
            ia=1
        else:
            metadatavals=pd.concat([metadatavals,mtmp])
            autoclass=pd.concat([auto_class,ac])
#metadatavals['metadata'].keys()

In [None]:
#ia=0
#for n in np.arange(0,lr):
#    if bin_has_autoclass(bin=range_response.iloc[n]):
#        ac=get_autoclass_data(bin=range_response.iloc[n],dataset_name=DATASET)
#        if ia==0:
#            auto_class=ac
#            ia=1
#        else:
#            auto_class=pd.concat([auto_class,ac])

In [57]:
auto_class.shape

(2951, 52)

In [58]:
metadatavals.shape

(31, 129)

Not sure why shapes don't have one common count