In [None]:
import requests
import json
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Optional, Any
import pytz
import math
import logging
import os
from tqdm.auto import tqdm
from pathlib import Path
import sys
import re
import os
import dateparser
from erclient import ERClient
from shapely import Point, MultiLineString
import pandas as pd
import geopandas as gpd
import concurrent.futures

BUOY_DEV_API_KEY = os.environ.get('BUOY_DEV_API_KEY')
BUOY_API_KEY = os.environ.get('BUOY_API_KEY')
RMWHUB_API_KEY = os.environ.get('RMWHUB_API_KEY')
BUOY_API_URL = os.environ.get('BUOY_API_URL')
RMWHUB_API_SEARCH_HUB_URL = os.environ.get('RMWHUB_API_SEARCH_HUB_URL')
RMWHUB_API_SEARCH_OWN_URL = os.environ.get('RMWHUB_API_SEARCH_OWN_URL')

In [None]:
def load_data_from_rmwhub(start_time):
    body = {
        "api_key": RMWHUB_API_KEY,
        "format_version": 0.1,
    #        "from_latitude": -90,
    #        "to_latitude": 90,
    #        "from_longitude": -180,
    #       "to_longitude": 180,
        "start_datetime_utc": start_time,
        "max_sets": 10000
    }
    
    rmwdata_other_data = requests.post(RMWHUB_API_SEARCH_HUB_URL, json=body).json()
    rmwdata_own_data = requests.post(RMWHUB_API_SEARCH_OWN_URL, json = body).json()
    rmw_all_data_list = rmwdata_other_data['sets'] + rmwdata_own_data['sets']
    
    rmw_all_data = {}
    for gearset in rmw_all_data_list:
        gearset['when_updated_utc'] = dateparser.parse(gearset['when_updated_utc']).astimezone(pytz.timezone('US/Pacific')).isoformat()
        rmw_all_data[gearset['set_id']] = gearset
    
    print(len(rmw_all_data), " sets loaded from rmwHub.")

    return rmwdata_own_data, rmw_all_data

def clean_er_trap_id(trap_id):
    trap_id = re.sub(r"rmwhub_", "", trap_id)
    trap_id = re.sub(r"edgetech_", "", trap_id)
    trap_id = re.sub(r"device_", "", trap_id)
    return trap_id.lower()

def clean_rmw_trap_id(trap_id):
    trap_id = re.sub("#*$", "", trap_id)
    trap_id = re.sub("^e_", "", trap_id)
    return trap_id.lower()

def convert_gearset_to_list(gearset: dict):
    deployment = sorted(clean_rmw_trap_id(trap['trap_id']) for trap in gearset['traps'])
    return deployment

def convert_er_observation_to_list(obs: dict):
    devices = obs['observation_details']['devices']
    deployment = sorted(clean_er_trap_id(device['device_id']) for device in devices)
    return deployment

def find_rmw_deployment(traps: List):
    for gearset in rmw_all_data:
        dep = convert_gearset_to_list(gearset)
        if(dep == traps):
            return gearset
    return None

def find_er_deployment(traps: List):
    for subject in er_subjects_by_name.values():
        dep = convert_er_observation_to_list(subject['last_observation'])
        if(dep == traps):
            return subject
    return None
            
def is_rmwset_active(gearset: dict):
    for trap in gearset['traps']:
        if(trap['status'] != 'deployed'):
            return False
            break
    return True

def load_subjects_from_er(start_time, print_duplicates = False):
    erclient = ERClient(service_root = "https://buoy.pamdas.org/api/v1.0", token = BUOY_API_KEY)
    subjects = erclient._get(path = "subjects", params = {
        "include_inactive": True,
        "include_details": True,
        "position_updated_since": start_time
    })
    er_subjects_by_name = {}
    duplicates = []
    for trap in subjects:
        clean = clean_er_trap_id(trap['name'])
        if(clean in traps):
            duplicates.append(clean)
        else:
            if(clean and trap):
                er_subjects_by_name[clean] = trap

    if(print_duplicates):
        print(f"{len(traps)} traps loaded.")
        print("")
        print(f"Duplicate Subjects ({len(duplicates)})")
        print("---------------------------------------------------------------")
        for dupe in duplicates:
            print(dupe)

    print(f"{len(er_subjects_by_name)} subjects loaded from ER.")
    return er_subjects_by_name

def get_latest_observation_for_subject(subject):
    obs = list(erclient._get("observations", params = {
        "subject_id": subject['id'],
        "sort_by": "-recorded_at",
        "include_details": "true",
        "page_size": 1,
        "include_additional_data": True
    })['results'])[0]

    subject['created_at'] = dateparser.parse(subject['created_at']).astimezone(pytz.timezone('US/Pacific')).isoformat()
    subject['last_observation'] = obs

    return subject

def add_latest_observations_to_subjects(er_subjects_by_name):

    with tqdm(total=len(er_subjects_by_name)) as pbar:
        subjects_with_obs = {}
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            futures = []
            for subject_name, subject in er_subjects_by_name.items():
                futures.append(executor.submit(get_latest_observation_for_subject, subject))
            for future in concurrent.futures.as_completed(futures):
                result = future.result()
                subjects_with_obs[result['name']] = result
                pbar.update(1)

    return subjects_with_obs
                
def generate_deployment_sets_for_er_data(er_subjects_by_name):

    er_deployments = {}
    er_active_deployments = {}
    
    for subject in er_subjects_by_name.values():
        o = subject['last_observation']['observation_details']
        deployment = sorted(clean_er_trap_id(device['device_id']) for device in o['devices'])
        er_deployments[subject["id"]] = deployment
    
        if(o['event_type'] == 'gear_deployed'):
            er_active_deployments[subject["id"]] = deployment

    return er_deployments, er_active_deployments

def generate_deployment_sets_for_rmw_data(rmw_all_data, rmw_own_data):
    rmw_own_deployments = {}
    for gearset in rmwdata_own_data['sets']:
        rmw_own_deployment = convert_gearset_to_list(gearset)
        dupe = False
        for check_id, check_gearset in rmw_own_deployments.items():
            if(check_gearset == gearset):
                dupe = True
                print("Duplicate deployment in RMW hub:", check_id, gearset['set_id'], gearset['when_updated_utc'], rmw_own_deployment)
        if(not dupe):
            rmw_own_deployments[gearset['set_id']] = rmw_own_deployment
    
    rmw_deployments = {}
    rmw_active_deployments = {}
    for gearset in rmw_all_data.values():
        rmw_deployment = convert_gearset_to_list(gearset)
        rmw_deployments[gearset['set_id']] = rmw_deployment
        if(is_rmwset_active(gearset)):            
            rmw_active_deployments[gearset['set_id']] = rmw_deployment

    return rmw_deployments, rmw_active_deployments

def map_unmatched_data(rmw_all_data, er_subjects_by_name, deployed_rmw_not_deployed_er, deployed_rmw_missing_er, deployed_er_not_deployed_rmw, deployed_er_missing_rmw):

    shapes = []
    for set_id, deployment in deployed_rmw_not_deployed_er:
        gearset = rmw_all_data[set_id]
        shape = {'origin': 'RMW', 'set_id': set_id, 'deployment': deployment, 'lat': gearset['traps'][0]['latitude'], 'lon': gearset['traps'][0]['longitude'], 'issue': 'Deployed in RMW, retrieved in ER', 'color': 'red'}
        points = [[trap['longitude'], trap['latitude']] for trap in gearset['traps']]
        if(len(points) == 1):
            shape["geometry"] = Point(points[0])
        else:
            shape["geometry"] = MultiLineString([points])
        shapes.append(shape)
    
    for set_id, deployment in deployed_rmw_missing_er:
        gearset = rmw_all_data[set_id]
        shape = {'origin': 'RMW', 'set_id': set_id, 'deployment': deployment, 'lat': gearset['traps'][0]['latitude'], 'lon': gearset['traps'][0]['longitude'], 'issue': 'Deployed in RMW, missing from ER', 'color': 'blue'}
        points = [[trap['longitude'], trap['latitude']] for trap in gearset['traps']]
        if(len(points) == 1):
            shape["geometry"] = Point(points[0])
        else:
            shape["geometry"] = MultiLineString([points])
        shapes.append(shape)
    
    for trap_id, deployment in deployed_er_not_deployed_rmw:
        devices = er_subjects_by_name[trap_id]['last_observation']['observation_details']['devices']
        shape = {'origin': 'ER', 'subject_id': trap_id, 'deployment': deployment, 'lat': devices[0]['location']['latitude'], 'lon': devices[0]['location']['longitude'], 'issue': 'Deployed in ER, retrieved in RMW', 'color': 'yellow'}
        points = [[device['location']['longitude'], device['location']['latitude']] for device in devices]
        if(len(points) == 1):
            shape["geometry"] = Point(points[0])
        else:
            shape["geometry"] = MultiLineString([points])
        shapes.append(shape)
    
    for trap_id, deployment in deployed_er_missing_rmw:
        devices = er_subjects_by_name[trap_id]['last_observation']['observation_details']['devices']
        shape = {'origin': 'ER', 'subject_id': trap_id, 'deployment': deployment, 'lat': devices[0]['location']['latitude'], 'lon': devices[0]['location']['longitude'], 'issue': 'Deployed in ER, missing from RMW', 'color': 'green'}
        points = [[device['location']['longitude'], device['location']['latitude']] for device in devices]
        if(len(points) == 1):
            shape["geometry"] = Point(points[0])
        else:
            shape["geometry"] = MultiLineString([points])
        shapes.append(shape)
            
    df = pd.DataFrame.from_dict(shapes)
    gdf = gpd.GeoDataFrame(df, geometry = df.geometry, crs="epsg:4326")
    gdf.explore(color=gdf['color'])

In [None]:
start_time = (datetime.now(tz=pytz.utc) - timedelta(days = 90)).isoformat()
rmw_own_data, rmw_all_data = load_data_from_rmwhub(start_time)

In [None]:
er_subjects_by_name = load_subjects_from_er(start_time)
er_subjects_by_name = add_latest_observations_to_subjects(er_subjects_by_name)

In [None]:
er_deployments, er_active_deployments = generate_deployment_sets_for_er_data(er_subjects_by_name)
rmw_deployments, rmw_active_deployments = generate_deployment_sets_for_rmw_data(rmw_all_data, rmw_own_data)
print(f"RMW: {len(rmw_deployments)} deployments, {len(rmw_active_deployments)} active.")
print(f"ER: {len(er_deployments)} deployments, {len(er_active_deployments)} active.")

In [None]:
deployed_rmw_not_deployed_er = []
deployed_rmw_missing_er = []
for gearset_id, deployment in rmw_active_deployments.items():
    if(deployment not in er_active_deployments.values()):
        if(deployment in er_deployments.values()):
            deployed_rmw_not_deployed_er.append((gearset_id, deployment))
        else:
            deployed_rmw_missing_er.append((gearset_id, deployment))

deployed_er_not_deployed_rmw = []
deployed_er_missing_rmw = []
for subject_name, deployment in er_active_deployments.items():
    if("rmwhub_" in deployment[0]):
        continue
    if(deployment not in rmw_active_deployments.values()):
        if(deployment in rmw_deployments.values()):
            deployed_er_not_deployed_rmw.append((subject_name, deployment))
        else:
            deployed_er_missing_rmw.append((subject_name, deployment))

print(f"Deployed in RMW but not in ER ({len(deployed_rmw_not_deployed_er)})")
print("---------------------------------------------------------------")
for missing in deployed_rmw_not_deployed_er:
    print(f"Set {missing[0]}: {missing[1]}")
print("\n")

print(f"Deployed in RMW but missing (w/in 3 months) in ER ({len(deployed_rmw_missing_er)})")
print("---------------------------------------------------------------")
for missing in deployed_rmw_missing_er:
    print(f"Set {missing[0]}: {missing[1]}")
print("\n")

print(f"Deployed in ER but not deployed in RMW ({len(deployed_er_not_deployed_rmw)})")
print("---------------------------------------------------------------")
for missing in deployed_er_not_deployed_rmw:
    print(f"Subject {missing[0]}: {missing[1]}")
print("\n")

print(f"Deployed in ER but missing (w/in 3 months) in RMW ({len(deployed_er_missing_rmw)})")
print("---------------------------------------------------------------")
for missing in deployed_er_missing_rmw:
    print(f"Subject {missing[0]}: {missing[1]}")


In [None]:
for set_id, deployment in deployed_rmw_not_deployed_er:
    gearset = rmw_all_data[set_id]
    dep_str = convert_gearset_to_list(gearset)
    er_dep = find_er_deployment(dep_str)
    if(dateparser.parse(gearset['when_updated_utc']) < dateparser.parse(er_dep['last_observation']['recorded_at'])):
        print("? ", end='')
    print(set_id, dep_str, is_rmwset_active(gearset), gearset['when_updated_utc'], er_dep['last_observation']['created_at'])