In [None]:
import pandas as pd
import time
from erddapy import ERDDAP

In [None]:
socatdoi='10.25921/yg69-jd96' # The SOCAT collection DOI info is not in ERDDAP (or can't find it)
flagaccuracy = {"A": 2.0, "B": 2.0, "C": 5.0, "D": 5.0, "E": 10.0}

In [None]:
e = ERDDAP(
        server='https://data.pmel.noaa.gov/socat/erddap',
        protocol='tabledap',
    )

e.response = 'csv'
e.dataset_id = 'socat_v2021_fulldata'
e.constraints = {
        # 'dist_to_land>=': 10
        # 'region_id=': A,C,I,N,O,R,T,Z
        # 'expocode=':'74AB19900918',
        'time>=': mindate,
        'time<=': maxdate,
        'latitude>=': minlat,
        'latitude<=': maxlat,
        'longitude>=': minlon,
        'longitude<=': maxlon,
        'WOCE_CO2_water=': "2" #synthesis file only has good data (keep questionable/bad?)
        # 'fCO2_water_sst_100humidity_uatm=~':"float('nan')" # Have yet to figure out how to set the nan filter
    }
e.variables = ['expocode','time','latitude','longitude','depth','sal','temp',
                   'fCO2_recommended','qc_flag','WOCE_CO2_water','socat_doi']
tempdf = e.to_pandas(dtype={10: str, 8: str, 0: str})

    # Retain only valid fco2 values (can't figure out how to do it in erdappy constrains yet)
tempdf=tempdf.dropna(subset=['fCO2_recommended (uatm)']).copy()
tempdf.reset_index(drop=True, inplace=True)

    # Rename columns
tempdf.rename(
        columns={'expocode': vardict['id'], 'socat_doi':vardict['doi'],
                 'latitude (degrees_north)': vardict['lat'], 'longitude (degrees_east)': vardict['lon'],
                 'depth (m)': vardict['dep'], 'temp (degrees C)': vardict['temp'], 'sal (PSU)': vardict['sal'],
                 'fCO2_recommended (uatm)': vardict['fco2w'], 'WOCE_CO2_water': vardict['fco2wf'],
                 'qc_flag':'Cruise_flag'},
        inplace=True)

    # Create python date object
tempdf['DATEVECTOR1'] = pd.to_datetime(tempdf['time (UTC)'])
tempdf[vardict['unixd']] = tempdf['DATEVECTOR1'].astype('int64') // 10 ** 9
tempdf[vardict['datevec']] = tempdf['DATEVECTOR1'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')


In [None]:
# Assign accuracies following cruise flags
tempdf[vardict['fco2wac']] = 0.0
for key in flagaccuracy:
    tempdf[vardict['fco2wac']].values[tempdf['Cruise_flag'] == key] = flagaccuracy[key]
# Flag fco2 as measured
#tempdf[vardict['fco2wc']]=0

# Estimate alkalinity from salinity, and then, estimate ph and dic

# Assign SOCAT DOI if Source DOI is missing
tempdf.loc[tempdf[vardict['doi']].isna(), vardict['doi']] = socatdoi

# Add source (SOCAT, GLODAP, ARGO, etc...)
tempdf['SOURCE'] = source

# Rename and reset indices
socatdf=tempdf
socatdf.reset_index(drop=True, inplace=True)

print('SOCAT frame size is ')
print(socatdf.shape)