In [1]:
### Replace lat/long/depth in deployment sheets with verified gold standard values from RCA sheet
### Script outputs all *UPDATED* deployment lines to a single file, sorted by reference designator

In [2]:
### import packages
import datetime
import pandas as pd
from os import path


In [3]:
### setup directory pathways
amRepo = 'https://raw.githubusercontent.com/oceanobservatories/asset-management/master/deployment/'
correctedFile = ('tmp/correctedDeployments_instruments.csv')
locationFilePath = 'https://raw.githubusercontent.com/OOI-CabledArray/deployments/main/positions/'


In [4]:
### load in lat/long/depths from csv sheet
InstFileName = locationFilePath + 'INSTRUMENT_locations.csv'
df_latlondepth_inst = pd.read_csv(InstFileName,dtype=str,keep_default_na=False, na_values=['N/A'])
df_latlondepth_inst = df_latlondepth_inst.fillna(-9999999)
latlongdepth_inst_dict = {}
for i in df_latlondepth_inst['referenceDesignator'].unique():
    latlongdepth_inst_dict[i] = [{'deployYear':df_latlondepth_inst['deployYear'][j], 'deployment':df_latlondepth_inst['deployment'][j], 'lat':df_latlondepth_inst['latitude'][j],'lon':df_latlondepth_inst['longitude'][j],'deploymentDepth':df_latlondepth_inst['deploymentDepth'][j],'waterDepth':df_latlondepth_inst['waterDepth'][j],} for j in df_latlondepth_inst[df_latlondepth_inst['referenceDesignator']==i].index]


In [5]:
### Load in Cabled Array Deployment sheets from OOI asset-management github repo
CabledArray = pd.Series(['CE02SHBP','CE04OSBP','CE04OSPD','CE04OSPS','RS01SBPD','RS01SBPS',
                        'RS01SLBS','RS01SUM1','RS03AXBS','RS03AXPD','RS03AXPS','RS03INT2',
                        'RS03INT1','RS01SUM2','RS03CCAL','RS03ECAL','RS03ASHS'])

deploymentSheets = []
for array in CabledArray:
    deployFilePath = amRepo + array + '_Deploy.csv' 
    deploymentSheets.append(deployFilePath)
    
df_deploy = pd.concat([pd.read_csv(f, skip_blank_lines = True, comment='#') for f in deploymentSheets], ignore_index = True)
df_deploy = df_deploy.fillna(-9999999)

In [6]:
### interate through the deployment sheet dataframe by row; 
### if lat/lon/deployment_depth/water_depth are different betweeen
### the instrument position sheet and deployment sheet, replace
### deployment sheet values with those from the instrument sheet.
### subdeployment code in place to identify matching deployments when
### there are multiple deployments per year.
### Track which rows are not changed in order to drop those from the final
### dataframe before printing to csv.

changeIndex = []
for index, row in df_deploy.iterrows():
    change = False
    refDes = row['Reference Designator']
    dt = datetime.datetime.strptime(row['startDateTime'], '%Y-%m-%dT%H:%M:%S')
    deployYear = str(dt.year)
    deployNumber = str(row['deploymentNumber'])
    if refDes in latlongdepth_inst_dict.keys():
        deployment = [x for x in latlongdepth_inst_dict[refDes] if deployYear in x['deployYear']] 
    else:
        deployment = []
        print("error:  no position entries for " + refDes + " for " + deployYear)
    if len(deployment) != 1:
        if len(deployment) > 1:
            subDeployment = [x for x in deployment if deployNumber in x['deployment']]
            if len(subDeployment) == 0:
                print("error obtaining new position: no entries for year and deployment number for " + refDes + ':' + deployYear + ':' + deployNumber)
            elif len(subDeployment) > 1:
                print("error obtaining new position: multiple entries for year and deployment number for " + refDes + ':' + deployYear + ':' + deployNumber)
            else:
                if row['lat'] != float(subDeployment[0]['lat']):
                    #print('lat does not match for {} in {}!'.format(refDes,deployYear))
                    #print('changing {} to {}'.format(str(row['lat']),subDeployment[0]['lat']))
                    df_deploy.at[index, 'lat'] = subDeployment[0]['lat']
                    change = True
                if row['lon'] != float(subDeployment[0]['lon']):
                    #print('lon does not match for {} in {}!'.format(refDes,deployYear))
                    #print('changing {} to {}'.format(str(row['lon']),subDeployment[0]['lon']))
                    df_deploy.at[index, 'lon'] = subDeployment[0]['lon']
                    change = True
                if row['deployment_depth'] != float(subDeployment[0]['deploymentDepth']):
                    #print('deployment depth does not match for {} in {}!'.format(refDes,deployYear))
                    #print('changing {} to {}'.format(str(row['deployment_depth']),subDeployment[0]['deploymentDepth']))
                    df_deploy.at[index, 'deployment_depth'] = str(subDeployment[0]['deploymentDepth'])
                    change = True
                if row['water_depth'] != float(subDeployment[0]['waterDepth']):
                    #print('water depth does not match for {} in {}!'.format(refDes,deployYear))
                    #print('changing {} to {}'.format(str(row['water_depth']),subDeployment[0]['waterDepth']))
                    df_deploy.at[index, 'water_depth'] = str(subDeployment[0]['waterDepth'])
                    change = True
        elif len(deployment) == 0:
            print("error obtaining new position: no entries for " + refDes + ':' + deployYear)
    else:
        if row['lat'] != float(deployment[0]['lat']):
            #print('lat does not match for {} in {}!'.format(refDes,deployYear))
            #print('changing {} to {}'.format(str(row['lat']),deployment[0]['lat']))
            df_deploy.at[index, 'lat'] = deployment[0]['lat']
            change = True
        if row['lon'] != float(deployment[0]['lon']):
            #print('lon does not match for {} in {}!'.format(refDes,deployYear))
            #print('changing {} to {}'.format(str(row['lon']),deployment[0]['lon']))
            df_deploy.at[index, 'lon'] = deployment[0]['lon']
            change = True
        if row['deployment_depth'] != float(deployment[0]['deploymentDepth']):
            #print('deployment depth does not match for {} in {}!'.format(refDes,deployYear))
            #print('changing {} to {}'.format(str(row['deployment_depth']),deployment[0]['deploymentDepth']))
            df_deploy.at[index, 'deployment_depth'] = str(deployment[0]['deploymentDepth'])
            change = True
        if row['water_depth'] != float(deployment[0]['waterDepth']):
            #print('water depth does not match for {} in {}!'.format(refDes,deployYear))
            #print('changing {} to {}'.format(str(row['water_depth']),deployment[0]['waterDepth']))
            df_deploy.at[index, 'water_depth'] = str(deployment[0]['waterDepth'])
            change = True
    if not change:
        changeIndex.append(index)
        
   

error obtaining new position: no entries for CE02SHBP-LJ01D-05-ADCPTB104:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-06-CTDBPN106:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-06-DOSTAD106:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-07-VEL3DC108:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-08-OPTAAD106:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-09-PCO2WB103:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-10-PHSEND103:2024
error obtaining new position: no entries for CE02SHBP-LJ01D-11-HYDBBA106:2024
error obtaining new position: no entries for CE02SHBP-MJ01C-07-ZPLSCB101:2024
error obtaining new position: no entries for CE02SHBP-MJ01C-08-CAMDSB107:2024
error:  no position entries for CE04OSBP-LJ01C-11-HYDBBA110 for 2023
error obtaining new position: no entries for CE04OSBP-LJ01C-11-HYDBBA110:2023
error obtaining new position: no entries for CE04OSBP-LJ01C-05-ADCPSI103:

In [7]:
### drop rows that are unchanged, format empty cells, and add 'N/A' for 
### deployment_depth of profilers.
### print to csv file in tmp directory;
### all changed deployment lines are exported to a single file.
df_deploy = df_deploy.drop(changeIndex)
df_deploy['deployment_depth']= df_deploy['deployment_depth'].astype(int)
df_deploy['deploymentNumber']= df_deploy['deploymentNumber'].astype(int)
df_deploy['versionNumber']= df_deploy['versionNumber'].astype(int)
df_deploy['deployment_depth'] = df_deploy['deployment_depth'].replace(-9999999,'N/A')
df_deploy = df_deploy.replace(-9999999, None)

df_deploy.to_csv(correctedFile, index=False)