In [1]:
### Replace lat/long/depth in deployment sheets with verified gold standard values from RCA sheet
### Script outputs all *UPDATED* deployment lines to a single file, sorted by reference designator

In [2]:
### import packages
import datetime
import pandas as pd
from os import path


In [3]:
### setup directory pathways
correctedFile = ('tmp/correctedDeployments_nodes.csv')
locationFilePath = 'https://raw.githubusercontent.com/OOI-CabledArray/deployments/main/'


In [4]:
### load in lat/long/depths from csv sheet
NodeFileName = locationFilePath + 'positions/NODE_locations.csv'
df_latlondepth_node = pd.read_csv(NodeFileName,dtype=str,keep_default_na=False, na_values=['N/A'])
df_latlondepth_node = df_latlondepth_node.fillna(-9999999)
latlondepth_node_dict = {}
for i in df_latlondepth_node['referenceDesignator'].unique():
    latlondepth_node_dict[i] = [{'deployYear':df_latlondepth_node['deployYear'][j], 'deployment':df_latlondepth_node['deployment'][j], 'lat':df_latlondepth_node['latitude'][j],'lon':df_latlondepth_node['longitude'][j],'deploymentDepth':df_latlondepth_node['deploymentDepth'][j],'waterDepth':df_latlondepth_node['waterDepth'][j],} for j in df_latlondepth_node[df_latlondepth_node['referenceDesignator']==i].index]


In [5]:
### Load in Node deployment sheets from OOI-CabledArray deployment github repo   
NodeDeployFileName = locationFilePath + 'NODE_deployments.csv'
df_deploy = pd.read_csv(NodeDeployFileName, skip_blank_lines = True)
df_deploy = df_deploy.fillna(-9999999)

In [6]:
### interate through the deployment sheet dataframe by row; 
### if lat/lon/deployment_depth/water_depth are different betweeen
### the instrument position sheet and deployment sheet, replace
### deployment sheet values with those from the instrument sheet.
### subdeployment code in place to identify matching deployments when
### there are multiple deployments per year.
### Track which rows are not changed in order to drop those from the final
### dataframe before printing to csv.

changeIndex = []
for index, row in df_deploy.iterrows():
    change = False
    refDes = row['Reference Designator']
    dt = datetime.datetime.strptime(row['startDateTime'], '%Y-%m-%dT%H:%M:%S')
    deployYear = str(dt.year)
    deployNumber = str(row['deploymentNumber'])
    if refDes in latlondepth_node_dict.keys():
        deployment = [x for x in latlondepth_node_dict[refDes] if deployYear in x['deployYear']] 
    else:
        deployment = []
        print("error:  no position entries for " + refDes + " for " + deployYear)
    if len(deployment) != 1:
        if len(deployment) > 1:
            subDeployment = [x for x in deployment if deployNumber in x['deployment']]
            if len(subDeployment) == 0:
                print("error obtaining new position: no entries for year and deployment number for " + refDes + ':' + deployYear + ':' + deployNumber)
            elif len(subDeployment) > 1:
                print("error obtaining new position: multiple entries for year and deployment number for " + refDes + ':' + deployYear + ':' + deployNumber)
            else:
                if row['lat'] != float(subDeployment[0]['lat']):
                    print('lat does not match for {} in {}!'.format(refDes,deployYear))
                    print('subdeploy changing {} to {}'.format(str(row['lat']),subDeployment[0]['lat']))
                    df_deploy.at[index, 'lat'] = subDeployment[0]['lat']
                    change = True
                if row['lon'] != float(subDeployment[0]['lon']):
                    print('lon does not match for {} in {}!'.format(refDes,deployYear))
                    print('subdeploy changing {} to {}'.format(str(row['lon']),subDeployment[0]['lon']))
                    df_deploy.at[index, 'lon'] = subDeployment[0]['lon']
                    change = True
                if row['deployment_depth'] != float(subDeployment[0]['deploymentDepth']):
                    print('deployment depth does not match for {} in {}!'.format(refDes,deployYear))
                    print('subdeploy changing {} to {}'.format(str(row['deployment_depth']),subDeployment[0]['deploymentDepth']))
                    df_deploy.at[index, 'deployment_depth'] = str(subDeployment[0]['deploymentDepth'])
                    change = True
                if row['water_depth'] != float(subDeployment[0]['waterDepth']):
                    print('water depth does not match for {} in {}!'.format(refDes,deployYear))
                    print('subdeploy changing {} to {}'.format(str(row['water_depth']),subDeployment[0]['waterDepth']))
                    df_deploy.at[index, 'water_depth'] = str(subDeployment[0]['waterDepth'])
                    change = True
        elif len(deployment) == 0:
            print("error obtaining new position: no entries for " + refDes + ':' + deployYear)
    else:
        if row['lat'] != float(deployment[0]['lat']):
            print('lat does not match for {} in {}!'.format(refDes,deployYear))
            print('deploy changing {} to {}'.format(str(row['lat']),deployment[0]['lat']))
            df_deploy.at[index, 'lat'] = deployment[0]['lat']
            change = True
        if row['lon'] != float(deployment[0]['lon']):
            print('lon does not match for {} in {}!'.format(refDes,deployYear))
            print('deploy changing {} to {}'.format(str(row['lon']),deployment[0]['lon']))
            df_deploy.at[index, 'lon'] = deployment[0]['lon']
            change = True
        if row['deployment_depth'] != float(deployment[0]['deploymentDepth']):
            print('deployment depth does not match for {} in {}!'.format(refDes,deployYear))
            print('deploy changing {} to {}'.format(str(row['deployment_depth']),deployment[0]['deploymentDepth']))
            df_deploy.at[index, 'deployment_depth'] = str(deployment[0]['deploymentDepth'])
            change = True
        if row['water_depth'] != float(deployment[0]['waterDepth']):
            print('water depth does not match for {} in {}!'.format(refDes,deployYear))
            print('deploy changing {} to {}'.format(str(row['water_depth']),deployment[0]['waterDepth']))
            df_deploy.at[index, 'water_depth'] = str(deployment[0]['waterDepth'])
            change = True
    if not change:
        changeIndex.append(index)
        
   

lat does not match for CE02SHBP-LJ01D in 2023!
deploy changing 44.637213 to 44.637204
lon does not match for CE02SHBP-LJ01D in 2023!
deploy changing -124.305724 to -124.305715
lat does not match for CE04OSBP-LJ01C in 2023!
deploy changing 44.369259 to 44.369344
lon does not match for CE04OSBP-LJ01C in 2023!
deploy changing -124.953508 to -124.953542
lat does not match for CE04OSPD-DP01B in 2023!
deploy changing 44.368196 to 44.368226
lon does not match for CE04OSPD-DP01B in 2023!
deploy changing -124.952808 to -124.952856
water depth does not match for CE04OSPD-DP01B in 2023!
deploy changing 582 to 584
lat does not match for CE04OSPD-PD01B in 2023!
deploy changing 44.368196 to 44.368226
lon does not match for CE04OSPD-PD01B in 2023!
deploy changing -124.952808 to -124.952856
water depth does not match for CE04OSPD-PD01B in 2023!
deploy changing 582 to 584
lat does not match for CE04OSPS-PC01B in 2018!
deploy changing 44.374224 to 44.374222
lon does not match for CE04OSPS-PC01B in 2018!

In [7]:
### drop rows that are unchanged, format empty cells, and add 'N/A' for 
### deployment_depth of profilers.
### print to csv file in tmp directory;
### all changed deployment lines are exported to a single file.
df_deploy = df_deploy.drop(changeIndex)
df_deploy['deployment_depth']= df_deploy['deployment_depth'].astype(int)
df_deploy['deploymentNumber']= df_deploy['deploymentNumber'].astype(int)
df_deploy['versionNumber']= df_deploy['versionNumber'].astype(int)
df_deploy['deployment_depth'] = df_deploy['deployment_depth'].replace(-9999999,'N/A')
df_deploy = df_deploy.replace(-9999999, None)

df_deploy.to_csv(correctedFile, index=False)