In [None]:
import requests
from datetime import datetime, timedelta
from typing import List
import pytz
import os
from tqdm.auto import tqdm
import re
import os
import dateparser
from erclient import ERClient
from shapely import Point, MultiLineString
import pandas as pd
import geopandas as gpd
import dotenv
import concurrent.futures

dotenv.load_dotenv(".env", override=True)



In [None]:
class RmwSyncAnalyzer():

    BUOY_DEV_API_KEY = os.environ.get('BUOY_DEV_API_KEY')
    BUOY_API_KEY = os.environ.get('BUOY_API_KEY')
    RMWHUB_API_KEY = os.environ.get('RMWHUB_API_KEY')
    BUOY_API_URL = os.environ.get('BUOY_API_URL')
    RMWHUB_API_SEARCH_HUB_URL = os.environ.get('RMWHUB_API_SEARCH_HUB_URL')
    RMWHUB_API_SEARCH_OWN_URL = os.environ.get('RMWHUB_API_SEARCH_OWN_URL')
    RMWHUB_API_HAUL_URL = os.environ.get('RMWHUB_API_HAUL_URL', 'https://ropeless.network/api/upload_deployments/')

    def __init__(self, start_time: datetime):
        self.erclient = ERClient(service_root = "https://buoy.pamdas.org/api/v1.0", token = RmwSyncAnalyzer.BUOY_API_KEY)
        self.start_time = start_time

        self.rmw_own_data = []
        self.rmw_all_data = {}
        self.er_subjects_by_name = {}
        self.er_trap_locations = {}
        self.rmw_active_deployments = {}
        self.rmw_trap_locations = {}

    def load_rmw_data(self):
        body = {
            "api_key": RmwSyncAnalyzer.RMWHUB_API_KEY,
            "format_version": 0.1,
            "start_datetime_utc": self.start_time,
            "max_sets": 10000
        }
        
        rmw_other_data = requests.post(RmwSyncAnalyzer.RMWHUB_API_SEARCH_HUB_URL, json=body).json()
        self.rmw_own_data = requests.post(RmwSyncAnalyzer.RMWHUB_API_SEARCH_OWN_URL, json = body).json()
        rmw_all_data_list = rmw_other_data['sets'] + self.rmw_own_data['sets']
        
        self.rmw_all_data = {}
        for gearset in rmw_all_data_list:
            gearset['when_updated_utc'] = dateparser.parse(gearset['when_updated_utc']).astimezone(pytz.timezone('US/Pacific')).isoformat()
            self.rmw_all_data[gearset['set_id']] = gearset
        
        self._generate_deployment_sets_for_rmw_data()
        self._generate_rmw_trap_locations()


    @staticmethod
    def clean_er_trap_id(trap_id):
        trap_id = re.sub(r"rmwhub_", "", trap_id)
        trap_id = re.sub(r"edgetech_", "", trap_id)
        trap_id = re.sub(r"device_", "", trap_id)
        return trap_id.lower()

    @staticmethod
    def clean_rmw_trap_id(trap_id):
        trap_id = re.sub("#*$", "", trap_id)
        trap_id = re.sub("^e_", "", trap_id)
        return trap_id.lower()

    @staticmethod
    def convert_gearset_to_list(gearset: dict):
        deployment = sorted(RmwSyncAnalyzer.clean_rmw_trap_id(trap['trap_id']) for trap in gearset['traps'])
        return deployment

    @staticmethod
    def convert_er_observation_to_list(obs: dict):
        devices = obs['observation_details']['devices']
        deployment = sorted(RmwSyncAnalyzer.clean_er_trap_id(device['device_id']) for device in devices)
        return deployment

    def find_rmw_deployment(self, traps: List):
        for gearset in self.rmw_all_data.values():
            dep = self.convert_gearset_to_list(gearset)
            if(dep == traps):
                return gearset
        return None

    def find_er_deployment_subjects(self, traps: List):
        matches = []
        for subject in self.er_subjects_by_name.values():
            dep = RmwSyncAnalyzer.convert_er_observation_to_list(subject['last_observation'])
            if(dep == traps):
                matches.append(subject)
        return matches
                
    def is_rmwset_active(gearset: dict):
        for trap in gearset['traps']:
            if(trap['status'] != 'deployed'):
                return False
                break
        return True

    def find_duplicate_deployments_in_rmw(self):
        deployments = {}
        dupes = {}
        for set_id, deployment in self.rmw_active_deployments.items():
            found = False
            for test_id, test_deployment in deployments.items():
                if(deployment == test_deployment):
                    dupes[set_id] = test_id
                    found = True
                    break
            if(not found):
                deployments[set_id] = deployment
        return dupes

    def find_duplicate_deployed_traps_in_rmw(rmwdata):
        traps = {}
        for gearset in rmwdata['sets']:
            for trap in gearset['traps']:
                if(trap['status'] == 'deployed'):
                    if(trap['trap_id'] in traps):
                        traps[trap['trap_id']].append(gearset['set_id'])
                    else:
                        traps[trap['trap_id']] = [gearset['set_id']]
        dupes = {}
        for trap, sets in traps.items():
            if(len(sets) > 1):
                dupes[trap] = sets
        return dupes

    def find_duplicate_deployed_traps_in_er(self):
        deployments = {}
        for subject_name in self.er_active_deployments.keys():
            subject = self.er_subjects_by_name[subject_name]
            devices = subject['last_observation']['observation_details']['devices']
            for device in devices:
                device_id = device['device_id']
                if(device_id in deployments):
                    found = False
                    for __, dep in deployments[device_id]:
                        if(dep == devices):
                            found = True
                    if(not found):
                        deployments[device_id].append((subject_name, devices))
                else:
                    deployments[device_id] = [(subject_name, devices)]
        dupes = {}
        for device_id, dev_deployments in deployments.items():
            if(len(dev_deployments) > 1):
                dupes[device_id] = dev_deployments
        return dupes
        
    def load_er_data(self):
        self._load_subjects_from_er()
        self._load_latest_observations()
        print(f"Loaded {len(self.er_subjects_by_name)} subjects and their last observations from ER")
        self._generate_deployment_sets_for_er_data()
        self._generate_er_trap_locations()

    def _load_subjects_from_er(self):
        subjects = self.erclient._get(path = "subjects", params = {
            "include_inactive": True,
            "include_details": True,
            "position_updated_since": self.start_time
        })
        self.er_subjects_by_name = {}
        duplicates = []
        for trap in subjects:
            clean = RmwSyncAnalyzer.clean_er_trap_id(trap['name'])
            if(clean in subjects):
                duplicates.append(clean)
            else:
                if(clean and trap):
                    self.er_subjects_by_name[clean] = trap

        print(f"{len(self.er_subjects_by_name)} subjects loaded from ER.")

    def _get_latest_observation_for_subject(self, subject):
        obs = list(self.erclient._get("observations", params = {
            "subject_id": subject['id'],
            "sort_by": "-recorded_at",
            "include_details": "true",
            "page_size": 1,
            "include_additional_data": True
        })['results'])[0]

        subject['created_at'] = dateparser.parse(subject['created_at']).astimezone(pytz.timezone('US/Pacific')).isoformat()
        subject['last_observation'] = obs

        return subject

    def _load_latest_observations(self):

        with tqdm(total=len(self.er_subjects_by_name)) as pbar:
            subjects_with_obs = {}
            with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
                futures = []
                for subject_name, subject in self.er_subjects_by_name.items():
                    futures.append(executor.submit(self._get_latest_observation_for_subject, subject))
                for future in concurrent.futures.as_completed(futures):
                    result = future.result()
                    subjects_with_obs[result['name']] = result
                    pbar.update(1)
        self.er_subjects_by_name = subjects_with_obs

    def _generate_deployment_sets_for_er_data(self):

        self.er_deployments = {}
        self.er_active_deployments = {}
        
        for subject in self.er_subjects_by_name.values():
            o = subject['last_observation']['observation_details']
            deployment = sorted(RmwSyncAnalyzer.clean_er_trap_id(device['device_id']) for device in o['devices'])
            self.er_deployments[subject["id"]] = deployment
        
            if(o['event_type'] in ['gear_deployed', 'smelts_buoy_deployment', 'smelts_buoy_subsea_data']):
                self.er_active_deployments[subject["name"]] = deployment
        print(f"ER: {len(self.er_deployments)} deployments, {len(self.er_active_deployments)} active.")


    def _generate_deployment_sets_for_rmw_data(self):
        rmw_own_deployments = {}
        for gearset in self.rmw_own_data['sets']:
            rmw_own_deployment = RmwSyncAnalyzer.convert_gearset_to_list(gearset)
            dupe = False
            for check_id, check_gearset in rmw_own_deployments.items():
                if(check_gearset == gearset):
                    dupe = True
            if(not dupe):
                rmw_own_deployments[gearset['set_id']] = rmw_own_deployment
        
        self.rmw_deployments = {}
        self.rmw_active_deployments = {}
        for gearset in self.rmw_all_data.values():
            rmw_deployment = RmwSyncAnalyzer.convert_gearset_to_list(gearset)
            self.rmw_deployments[gearset['set_id']] = rmw_deployment
            if(RmwSyncAnalyzer.is_rmwset_active(gearset)):            
                self.rmw_active_deployments[gearset['set_id']] = rmw_deployment
        print(f"RMW: {len(self.rmw_deployments)} deployments, {len(self.rmw_active_deployments)} active.")
        
    def map_unmatched_data(rmw_all_data, er_subjects_by_name, deployed_rmw_not_deployed_er, deployed_rmw_missing_er, deployed_er_not_deployed_rmw, deployed_er_missing_rmw):

        shapes = []
        for set_id, deployment in deployed_rmw_not_deployed_er:
            gearset = rmw_all_data[set_id]
            shape = {'origin': 'RMW', 'set_id': set_id, 'deployment': deployment, 'lat': gearset['traps'][0]['latitude'], 'lon': gearset['traps'][0]['longitude'], 'issue': 'Deployed in RMW, retrieved in ER', 'color': 'red'}
            points = [[trap['longitude'], trap['latitude']] for trap in gearset['traps']]
            if(len(points) == 1):
                shape["geometry"] = Point(points[0])
            else:
                shape["geometry"] = MultiLineString([points])
            shapes.append(shape)
        
        for set_id, deployment in deployed_rmw_missing_er:
            gearset = rmw_all_data[set_id]
            shape = {'origin': 'RMW', 'set_id': set_id, 'deployment': deployment, 'lat': gearset['traps'][0]['latitude'], 'lon': gearset['traps'][0]['longitude'], 'issue': 'Deployed in RMW, missing from ER', 'color': 'blue'}
            points = [[trap['longitude'], trap['latitude']] for trap in gearset['traps']]
            if(len(points) == 1):
                shape["geometry"] = Point(points[0])
            else:
                shape["geometry"] = MultiLineString([points])
            shapes.append(shape)
        
        for trap_id, deployment in deployed_er_not_deployed_rmw:
            devices = er_subjects_by_name[trap_id]['last_observation']['observation_details']['devices']
            shape = {'origin': 'ER', 'subject_id': trap_id, 'deployment': deployment, 'lat': devices[0]['location']['latitude'], 'lon': devices[0]['location']['longitude'], 'issue': 'Deployed in ER, retrieved in RMW', 'color': 'yellow'}
            points = [[device['location']['longitude'], device['location']['latitude']] for device in devices]
            if(len(points) == 1):
                shape["geometry"] = Point(points[0])
            else:
                shape["geometry"] = MultiLineString([points])
            shapes.append(shape)
        
        for trap_id, deployment in deployed_er_missing_rmw:
            devices = er_subjects_by_name[trap_id]['last_observation']['observation_details']['devices']
            shape = {'origin': 'ER', 'subject_id': trap_id, 'deployment': deployment, 'lat': devices[0]['location']['latitude'], 'lon': devices[0]['location']['longitude'], 'issue': 'Deployed in ER, missing from RMW', 'color': 'green'}
            points = [[device['location']['longitude'], device['location']['latitude']] for device in devices]
            if(len(points) == 1):
                shape["geometry"] = Point(points[0])
            else:
                shape["geometry"] = MultiLineString([points])
            shapes.append(shape)
                
        df = pd.DataFrame.from_dict(shapes)
        gdf = gpd.GeoDataFrame(df, geometry = df.geometry, crs="epsg:4326")
        gdf.explore(color=gdf['color'])

    def _generate_rmw_trap_locations(self):
        self.rmw_trap_locations = {}
        for gearset_id in self.rmw_active_deployments:
            gearset = self.rmw_all_data[gearset_id]
            for trap in gearset['traps']:
                self.rmw_trap_locations[RmwSyncAnalyzer.clean_rmw_trap_id(trap['trap_id'])] = (round(trap['latitude'], 5), round(trap['longitude'], 5))

    def _generate_er_trap_locations(self):
        self.er_trap_locations = {}
        for subject_name in self.er_active_deployments.keys():
            subject = self.er_subjects_by_name[subject_name]
            for device in subject['last_observation']['observation_details']['devices']:
                self.er_trap_locations[RmwSyncAnalyzer.clean_er_trap_id(device['device_id'])] = (round(device['location']['latitude'], 5), round(device['location']['longitude'], 5))
    
    def haul_rmw_gearset(self, gearset_id):
    
        gearset = self.rmw_all_data.get(gearset_id)
        if(not gearset):
            print(f"Gearset {gearset} not found.")
            return

        body = {
            "api_key": RmwSyncAnalyzer.RMWHUB_API_KEY,
            "format_version": 0.1,
            "sets": [{
                "vessel_id": gearset["vessel_id"],
                "set_id": gearset["set_id"],
                "deployment_type": "single" if len(gearset['traps']) == 1 else "trawl",
                "traps": []
            }]}

        for trap in gearset['traps']:
            trap["status"] = "retrieved"
            trap["retrieved_datetime_utc"] = datetime.now(tz=pytz.utc).isoformat()
            body["sets"][0]["traps"].append(trap)

        response = requests.post(RmwSyncAnalyzer.RMWHUB_API_HAUL_URL, json=body).json()
        return response
    
    def haul_er_deployment(self, traps: List[str]):
        """Hauls an ER trap based on a trap description.  For example, to haul the deployment consisting of edgetech_123_A and edgetech_123_B, you'd pass: ["123_A", "123_B"]

        Args:
            traps (List[str]): Description of trawl to haul.
        """

        for subject in self.find_er_deployment_subjects(traps):
            source_obs = subject['last_observation'].copy()
            now = datetime.now(tz = pytz.utc).isoformat()
            obs = {'additional': {},
                'recorded_at': now}
            for attr in ['location', 'source', 'observation_details']:
                obs[attr] = source_obs[attr] 

            obs['additional']['event_type'] = 'gear_retrieved'
            devices = []
            for device in obs['observation_details']['devices']:
                device['last_retrieved'] = now
                devices.append(device)
            obs['additional']['devices'] = devices

            print(f"Updating {subject['name']}")
            self.erclient.post_observation(obs)

In [None]:
start_time = (datetime.now(tz=pytz.utc) - timedelta(days = 1)).isoformat()
analyzer = RmwSyncAnalyzer(start_time)
analyzer.load_rmw_data()
analyzer.load_er_data()

In [None]:
dupes = analyzer.find_duplicate_deployments_in_rmw()
er_dupes = analyzer.find_duplicate_deployed_traps_in_er()

deployed_rmw_not_deployed_er = []
deployed_rmw_missing_er = []

for gearset_id, deployment in analyzer.rmw_active_deployments.items():
    if(deployment not in analyzer.er_active_deployments.values()):
        if(deployment in analyzer.er_deployments.values()):
            deployed_rmw_not_deployed_er.append((gearset_id, deployment))
        else:
            when = analyzer.rmw_all_data[gearset_id]['when_updated_utc']
            if(dateparser.parse(when) > datetime(year = 2025, month = 1, day = 1, tzinfo=pytz.utc)):
                deployed_rmw_missing_er.append((gearset_id, deployment))

deployed_er_not_deployed_rmw = []
deployed_er_missing_rmw = []
for subject_name, deployment in analyzer.er_active_deployments.items():
    if("rmwhub_" in deployment[0]):
        continue
    if(deployment not in analyzer.rmw_active_deployments.values()):
        if(deployment in analyzer.rmw_deployments.values()):
            deployed_er_not_deployed_rmw.append((subject_name, deployment))
        else:
            deployed_er_missing_rmw.append((subject_name, deployment))

different_locations = {}
for device_id, location in analyzer.er_trap_locations.items():
    if(device_id in analyzer.rmw_trap_locations):
        if(location != analyzer.rmw_trap_locations[device_id]):
            different_locations[device_id] = (location, analyzer.rmw_trap_locations[device_id])
            
for device_id, location in analyzer.rmw_trap_locations.items():
    if(device_id in analyzer.er_trap_locations):
        if(location != analyzer.er_trap_locations[device_id]):
            different_locations[device_id] = (analyzer.er_trap_locations[device_id], location)


print(f"Duplicate deployments in ER ({len(er_dupes)}")
print("--------------------------------------------------------------------")
for k, v in er_dupes.items():
    subjects = [subj[0] for subj in v]
    print(f"{k} is in {len(subjects)} conflicting deployments: {', '.join(subjects)}")
print()


print(f"Duplicate deployments in RMW ({len(dupes)}")
print("--------------------------------------------------------------------")
if(dupes):   
    for k, v in dupes.items():
        print(f"{k} is a dupe of {v}: {analyzer.rmw_active_deployments[k]}")
print()

'''
dupe_traps = find_duplicate_deployed_traps_in_rmw(rmw_own_data)
print(f"Duplicate deployed traps in RMW ({len(dupe_traps)}) [Allowed by current business logic]")
print("--------------------------------------------------------------------")
if(dupe_traps):
    for trap_id, sets in dupe_traps.items():
        print(f"Trap {trap_id} deployed in sets: {sets}")
print()
'''

print(f"Deployed in RMW but not in ER ({len(deployed_rmw_not_deployed_er)})")
print("--------------------------------------------------------------------")
if(len(deployed_rmw_not_deployed_er) > 0):
    for missing in deployed_rmw_not_deployed_er:
        when = analyzer.rmw_all_data[missing[0]]['when_updated_utc']
        print(f"Set {missing[0]} {missing[1]} at {when}")
print()

print(f"Deployed in RMW but missing in ER ({len(deployed_rmw_missing_er)})")
print("--------------------------------------------------------------------")
if(len(deployed_rmw_missing_er) > 0):
    for missing in deployed_rmw_missing_er:
        when = analyzer.rmw_all_data[missing[0]]['when_updated_utc']
        print(f"Set {missing[0]} {missing[1]} at {when}")
print()

print(f"Deployed in ER but not deployed in RMW ({len(deployed_er_not_deployed_rmw)})")
print("--------------------------------------------------------------------")
if(len(deployed_er_not_deployed_rmw) > 0):
    for missing in deployed_er_not_deployed_rmw:
        when = analyzer.find_er_deployment_subjects(missing[1])[0]['last_observation']['created_at']
        print(f"Subject {missing[0]} {missing[1]} at {when}")
print()

print(f"Deployed in ER but missing in RMW ({len(deployed_er_missing_rmw)})")
print("--------------------------------------------------------------------")
if(len(deployed_er_missing_rmw) > 0):
    for missing in deployed_er_missing_rmw:
        when = analyzer.find_er_deployment_subjects(missing[1])[0]['last_observation']['created_at']
        print(f"Subject {missing[0]}: {missing[1]} at {when}")
print()

print(f"Deployed in ER and RMW but at different locations ({len(different_locations)})")
print("--------------------------------------------------------------------")
for device, locations in different_locations.items():
    print(f"{device}: {locations[0]} in ER vs. {locations[1]} in RMW")