In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import glob, os
import matplotlib.pyplot as plt
import time
import scipy.stats as stats
import matplotlib.dates as mdates
import re
import csv
import warnings

### Set Path

In [3]:
# read in a user-created text file to point to local directories to avoid having to change this every time 
# we update code
lines=[]
with open('path_file.txt') as f:
    lines = f.readlines()
    
count = 0
for line in lines:
    count += 1
    index = line.find("=")
    #print(f'line {count}: {line}')
    #print(index)
    #print(line[0:index])
    line = line.rstrip()
    if line[0:index].find("argo")>=0:
        argo_path=line[index+1:]
    elif line[0:index].find("liar")>=0:
        liar_dir=line[index+1:]
    elif line[0:index].find("matlab")>=0:
        matlab_dir=line[index+1:]
        
# Set the paths
output_dir = 'output/'
data_dir = 'data/'

### Get list of float files

In [4]:
# get list of argo meta files
argolist = []
for file in os.listdir(argo_path):
    if file.endswith('meta.nc'):
        argolist.append(file)

print(len(argolist))

2172


### Create a table with all calibration comments ("cal_str_table")


In [5]:
cal_str_table = pd.DataFrame(columns=['wmo','o2_cal_comment', 'o2_cal_eq'])
# temp_list = ['3902471_meta.nc']
# loop through all, open meta files, read in calibration comments, store
for n, file in enumerate(argolist): #enumerate(temp_list): # 
    print(str(n) + ' ' + file)


    #also load meta file  for same float
    file_n = argo_path + file
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="invalid value encountered in cast")
        meta_n = xr.open_dataset(argo_path + file)
    # meta_n
    parameters_n = meta_n.PARAMETER.values
    # print(parameters_n)

    contains_doxy = any(parameter.decode('utf-8').strip() == 'DOXY' for parameter in parameters_n)
    if contains_doxy is False:
        continue
        print('no doxy')

    # not sure if i need this, but loading the Sprof file as well

    wmo_n = int((re.search(r'\d+', file)).group())

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="invalid value encountered in cast")
        try:
            sprof_n = xr.open_dataset(argo_path + str(wmo_n) + '_Sprof.nc')
        except:
            continue

    n_prof = sprof_n.dims['N_PROF']
    # gets order of sensors to extract calibration comments
    # some profiles might be missing sensor name (not sure why) so loop through looking
    o2_ind_all = np.full(n_prof, np.nan)
    # finds where o2 calibration comment is in each profile (at least for one float it changes from the first profile to the rest)
    for p in range(0, n_prof):
        cal_str = sprof_n.STATION_PARAMETERS.values.astype(str)[p]
        # print(cal_str)
        for i, param in enumerate(cal_str):
            if 'DOXY' in param:
                o2_ind_all[p] = i
                break
    
    # o2_ind = np.unique(o2_ind_all[~np.isnan(o2_ind_all)])
    # if len(o2_ind)>1:
    #     print('multiple o2 ind' + str(wmo_n))
    #     break
    o2_cal_full = []
    o2_eq_full = []

    for idx, o2_ind in enumerate(o2_ind_all):
        if ~np.isnan(o2_ind):
            o2_cal_full.append(sprof_n.SCIENTIFIC_CALIB_COMMENT.values[idx,-1,np.int32(o2_ind)])
            # pd.concat([o2_cal_full]+ sprof_n.SCIENTIFIC_CALIB_COMMENT.values[idx,-1,np.int32(o2_ind)], ignore_index=True)
            o2_eq_full.append(sprof_n.SCIENTIFIC_CALIB_EQUATION.values[idx,-1,np.int32(o2_ind)])

    data_comment_eq = pd.DataFrame({'o2_cal_full': o2_cal_full, 'o2_eq_full': o2_eq_full})

    # Drop duplicate rows to get unique pairs
    unique_pairs = data_comment_eq.drop_duplicates()
    # Create an empty list to store DataFrames
    data_frames = []
    # print(o2_cal_unique)
    # might have multiple unique comments, so save out each one:
    for i in unique_pairs.index:
        o2_cal_i = unique_pairs.o2_cal_full[i].decode("utf-8")
        o2_eq_i = unique_pairs.o2_eq_full[i].decode("utf-8")

        # print(o2_cal_i)
        # save with wmo only for now
        new_data_cal_info = pd.DataFrame({'wmo': [wmo_n],
                                        'o2_cal_comment': [o2_cal_i],
                                        'o2_cal_eq': [o2_eq_i]})
        # Append the new_data_cal_info DataFrame to the list
        data_frames.append(new_data_cal_info)

        # cal_str_table = cal_str_table.append(new_data_cal_info, ignore_index=True)
    # print(data_frames)
    # Concatenate all DataFrames in the list into a single DataFrame
    cal_str_table = pd.concat([cal_str_table] + data_frames, ignore_index=True)
    # print(cal_str_table)
    # if n==10:
    #     break
    # if wmo_n== 3902471:
    #     break
print(cal_str_table)

0 4901216_meta.nc
1 7901019_meta.nc
2 3902461_meta.nc
3 5904485_meta.nc
4 6903878_meta.nc
5 7901009_meta.nc
6 4902641_meta.nc
7 3902471_meta.nc
8 6900874_meta.nc
9 2900787_meta.nc
10 5906972_meta.nc
11 5905381_meta.nc
12 4903668_meta.nc
13 5906042_meta.nc
14 6902734_meta.nc
15 2903454_meta.nc
16 6902019_meta.nc
17 6902980_meta.nc
18 7900878_meta.nc
19 1901153_meta.nc
20 5901739_meta.nc
21 5906571_meta.nc
22 2903167_meta.nc
23 5906561_meta.nc
24 1902662_meta.nc
25 2902238_meta.nc
26 5905136_meta.nc
27 4900869_meta.nc
28 6903030_meta.nc
29 5901372_meta.nc
30 2900448_meta.nc
31 4900879_meta.nc
32 6903042_meta.nc
33 2900542_meta.nc
34 6902548_meta.nc
35 5902112_meta.nc
36 6901632_meta.nc
37 5901310_meta.nc
38 5906513_meta.nc
39 6901896_meta.nc
40 6990516_meta.nc
41 5906503_meta.nc
42 6901886_meta.nc
43 7900962_meta.nc
44 6903571_meta.nc
45 2900119_meta.nc
46 6990660_meta.nc
47 5906020_meta.nc
48 5905232_meta.nc
49 5906030_meta.nc
50 5903616_meta.nc
51 4903817_meta.nc
52 3902130_meta.nc
53 

In [31]:
# let's find float 5906767 and check it. It is currently categorized as an SBE63 with air cal.
# comment does read as: 'G determined from float  measurements in air. See Johnson et al.,2015,doi:10.1175/JTECH-D-15-0101.1 
file = '5906767_meta.nc'
file_n = argo_path + file
file_n
meta_n = xr.open_dataset(argo_path + file)
meta_n
parameters_n = meta_n.PARAMETER.values
parameters_n

wmo_n = int((re.search(r'\d+', file)).group())
sprof_n = xr.open_dataset(argo_path + str(wmo_n) + '_Sprof.nc')
sprof_n


In [34]:
n_prof = sprof_n.dims['N_PROF']
# gets order of sensors to extract calibration comments
# some profiles might be missing sensor name (not sure why) so loop through looking
o2_ind_all = np.full(n_prof, np.nan)
# finds where o2 calibration comment is in each profile (at least for one float it changes from the first profile to the rest)
for p in range(0, n_prof):
    cal_str = sprof_n.STATION_PARAMETERS.values.astype(str)[p]
    # print(cal_str)
    for i, param in enumerate(cal_str):
        if 'DOXY' in param:
            o2_ind_all[p] = i
            break
o2_ind_all
o2_cal_full = []
o2_eq_full = []

for idx, o2_ind in enumerate(o2_ind_all):
    if ~np.isnan(o2_ind):
        print(sprof_n.SCIENTIFIC_CALIB_COMMENT.values[idx,-1,np.int32(o2_ind)])
        # pd.concat([o2_cal_full]+ sprof_n.SCIENTIFIC_CALIB_COMMENT.values[idx,-1,np.int32(o2_ind)], ignore_index=True)
        print(sprof_n.SCIENTIFIC_CALIB_EQUATION.values[idx,-1,np.int32(o2_ind)])

b'G determined from float  measurements in air. See Johnson et al.,2015,doi:10.1175/JTECH-D-15-0101.1                                                                                                                                                             '
b'DOXY_ADJUSTED=DOXY*G; G = G_INIT + G_DRIFT*(JULD_PROF - JULD_INIT)/365                                                                                                                                                                                          '
b'G determined from float  measurements in air. See Johnson et al.,2015,doi:10.1175/JTECH-D-15-0101.1                                                                                                                                                             '
b'DOXY_ADJUSTED=DOXY*G; G = G_INIT + G_DRIFT*(JULD_PROF - JULD_INIT)/365                                                                                                                                                    

In [36]:
# Now find the sensor info 
meta_n['SENSOR_MODEL']

In [6]:
cal_str_table

Unnamed: 0,wmo,o2_cal_comment,o2_cal_eq
0,4901216,DOXY_ADJUSTED corrected based on the WOA 2018 ...,PSAT = f(DOXY); PSAT_ADJUSTED = G*PSAT; DOXY_A...
1,4901216,Bad data; not adjustable ...,none ...
2,7901019,...,...
3,3902461,...,...
4,5904485,DOXY_ADJUSTED corrected using continuous in-ai...,PSAT = f(DOXY); PSAT_ADJUSTED = G*PSAT; DOXY_A...
...,...,...,...
2847,4900320,DOXY_ADJUSTED corrected based on the WOA 2018 ...,PSAT = f(DOXY); PSAT_ADJUSTED = G*PSAT; DOXY_A...
2848,4900320,none ...,none ...
2849,4900320,Bad data; not adjustable ...,none ...
2850,6901490,"Adjusted on CTD at deployment, optode simple c...",PPOX_ADJUSTED=OFFSET+(PPOX*SLOPE)*(1+DRIFT/100...


In [13]:
# retain only unique comments 
unique_cal_comments = np.unique(cal_str_table.o2_cal_comment)
wmo_comment_data_table = []
for comm in unique_cal_comments:
    n_wmo_for_comment = len(np.unique(cal_str_table.wmo[cal_str_table.o2_cal_comment==comm]))
    # print(n_wmo_for_comment)

    # Create a sublist for each row and append it to wmo_comment_data_table
    row = [comm, n_wmo_for_comment]
    wmo_comment_data_table.append(row)

    # if len(wmo_comment_data_table)==0:
    #     wmo_comment_data_table = [comm, n_wmo_for_comment]
    # else:
    #     wmo_comment_data_table.append([comm, n_wmo_for_comment])

print(len(wmo_comment_data_table))



236


In [11]:
wmo_comment_data_table

[['                                                                                                                                                                                                                                                                ',
  500],
 ['1-point multiplicative correction using WOD at 1008.49 dbar. The quoted error was computed via comparisons with monthly or annual climatology data, interpolated to float location, depth, and season, from WOA09.                                              ',
  1],
 ['1-point multiplicative correction using WOD at 1269.58 dbar. The quoted error was computed via comparisons with monthly or annual climatology data, interpolated to float location, depth, and season, from WOA09.                                              ',
  1],
 ['1-point multiplicative correction using WOD at 1792.895 dbar. The quoted error was computed via comparisons with monthly or annual climatology data, interpolated to float location, depth, and

### Make into dataframe, add calibration information

In [14]:
df = pd.DataFrame(wmo_comment_data_table)
# csv_file=  'wmo_comment_data_table_R1.csv'

In [243]:
# # Write the list to the CSV file
# with open(csv_file, 'w', newline='') as file:
#     writer = csv.writer(file)
#     writer.writerows(wmo_comment_data_table)

In [15]:
o2_cal = wmo_comment_data_table[100]
print(o2_cal)

#group O2 air cal and no air cal meta groups
if any(substring in o2_cal for substring in air_cal_list):
    o2_air_group = 'air cal'
elif any(substring in o2_cal for substring in noair_cal_combined_list):
    o2_air_group = 'no air cal'
else:
    o2_air_group = 'no cal/bad'
    
o2_air_group

['DOXY_ADJUSTED is estimated from the last valid cycle with DM adjustment, DOXY_ADJUSTED_ERROR is recomputed from a PPOX_ERROR = 13.1 mbar with an increase of 1mbar/year                                                                                         ', 1]


NameError: name 'air_cal_list' is not defined

In [51]:
df.columns

RangeIndex(start=0, stop=2, step=1)

In [32]:
# df2

Unnamed: 0,cal_comment,n
0,...,381
1,1-point multiplicative correction using WOD at...,1
2,1-point multiplicative correction using WOD at...,1
3,1-point multiplicative correction using WOD at...,1
4,1-point multiplicative correction using WOD at...,1
...,...,...
193,"optode multi calibration, adjusted with median...",36
194,"optode multi calibration, adjusted with median...",26
195,"optode multi calibration, adjusted with median...",3
196,"optode multi calibration, adjusted with median...",1


In [34]:
# df2['category']=np.nan
# df2

Unnamed: 0,cal_comment,n,category
0,...,381,
1,1-point multiplicative correction using WOD at...,1,
2,1-point multiplicative correction using WOD at...,1,
3,1-point multiplicative correction using WOD at...,1,
4,1-point multiplicative correction using WOD at...,1,
...,...,...,...
193,"optode multi calibration, adjusted with median...",36,
194,"optode multi calibration, adjusted with median...",26,
195,"optode multi calibration, adjusted with median...",3,
196,"optode multi calibration, adjusted with median...",1,


In [16]:
  #no calibration bad data
bad_cal_list = ['Sensor issue','out of order','Bad data; not adjustable','Biofouling','unadjustable']

#no calibration, reason unspecified
no_cal_list = ['no adjustment','No QC','none','not applicable']

#blank cal
blank_cal = ['                ']


air_cal_list = ['in air', 'in-air']

noair_cal_surf_list = ['World Ocean Atlas', 'woa', 'WOA', 'no in-air', 'no in air', 
                       'climatology','DOXY_QCs are modified during visual check',
                      'Takeshita', 'CTD at deployment', 'Adjustment done on PPOX_DOXY',
                        'DOXY_ADJUSTED is estimated', 'G obtained from DOXY audit', 'DOXY adjusted by gain', 
                        'No adjustment was necessary', 'No significant oxygen drift detected']


noair_cal_subsurf_list = []
# ['1-point multiplicative corr']

noair_cal_funcofdoxy_list = []
# ['Percent saturation corrected as','DOXY_ADJUSTED computed using Ste',
#                         'Oxygen concentration corrected']

noair_cal_unspec_list = []
# ['DOXY_ADJUSTED corrected based','Adjust with WOA monthly','GAIN determined from WOA2013',
#                         'Adjusted with WOA climatology','Adjusted with SAGEO2 based on WO',
#                          'Adjusted with SAGEO2 on WOA','Adjusted with WOA 2018','Takeshita and all, 2013']

noair_cal_withdrift_list = []
# ['Adjustment done on PPOX_DOXY;Tem'] #this is incomplete

noair_cal_combined_list = noair_cal_surf_list+noair_cal_subsurf_list+noair_cal_funcofdoxy_list+noair_cal_unspec_list

In [17]:
df2 = df.rename(columns={0: "cal_comment", 1: "n"})
df2['category']=np.nan


pd.options.mode.copy_on_write = True
for i in range(0,len(df2)):
    o2_cal = df2['cal_comment'][i]
    # print(o2_cal)

    #group O2 air cal and no air cal meta groups
    if any(substring in o2_cal for substring in air_cal_list):
        if any(test_string in o2_cal.lower() for test_string in ['no in air', 'no in-air']):
            o2_air_group = 'no air cal'
        else:
            o2_air_group = 'air cal'
        
    elif any(substring in o2_cal for substring in noair_cal_combined_list):
        o2_air_group = 'no air cal'
    else:
        o2_air_group = 'no cal/bad'

    # print(o2_air_group)
    df2.at[i, 'category']= o2_air_group


In [18]:
o2_cal.lower()

'optode simple calibration, adjusted with ctd at deployment                                                                                                                                                                                                      '

### Save out comments to a .csv to allow later re-reading

In [19]:
csv_file=  'wmo_comment_data_table_R1_2024_12_19.csv'
df2.to_csv('../spreadsheets/' + csv_file)

### Everything below is old

In [21]:
df2

Unnamed: 0,cal_comment,n,category
0,...,500,no cal/bad
1,1-point multiplicative correction using WOD at...,1,no air cal
2,1-point multiplicative correction using WOD at...,1,no air cal
3,1-point multiplicative correction using WOD at...,1,no air cal
4,1-point multiplicative correction using WOD at...,1,no air cal
...,...,...,...
232,"optode multi calibration, adjusted with median...",26,no air cal
233,"optode multi calibration, adjusted with median...",3,no air cal
234,"optode multi calibration, adjusted with median...",1,no air cal
235,"optode simple calibration, adjusted with CTD a...",1,no air cal


In [227]:
o2_offset_data_table = []
len(o2_offset_data_table)

0

In [208]:
o2_cal_full = []
o2_eq_full = []

for idx, o2_ind in enumerate(o2_ind_all):
    if ~np.isnan(o2_ind):
        o2_cal_full.append(sprof_n.SCIENTIFIC_CALIB_COMMENT.values[idx,-1,np.int32(o2_ind)])
        # pd.concat([o2_cal_full]+ sprof_n.SCIENTIFIC_CALIB_COMMENT.values[idx,-1,np.int32(o2_ind)], ignore_index=True)
        o2_eq_full.append(sprof_n.SCIENTIFIC_CALIB_EQUATION.values[idx,-1,np.int32(o2_ind)])

    # print(o2_cal_full)
data_comment_eq = pd.DataFrame({'o2_cal_full': o2_cal_full, 'o2_eq_full': o2_eq_full})
unique_pairs = data_comment_eq.drop_duplicates()

print(unique_pairs)


                                         o2_cal_full  \
0  b'DOXY_ADJUSTED is computed from an adjustment...   

                                          o2_eq_full  
0  b'PPOX_DOXY=f(DOXY), PPOX_DOXY_ADJUSTED=(SLOPE...  


In [202]:
len(o2_cal_full)

30

In [192]:
tt = sprof_n.SCIENTIFIC_CALIB_COMMENT.values[1,-1,np.int32(o2_ind)]

print(tt)

b'DOXY_ADJUSTED is computed from an adjustment of in water PSAT or PPOX float data at surface by comparison to woaPSAT climatology or woaPPOX{woaPSAT,floatTEMP,floatPSAL} at 1 atm, DOXY_ADJUSTED_ERROR is computed from a PPOX_ERROR of 10.0 mbar +1mb/year     '


In [144]:
p = 1
cal_str = sprof_n.STATION_PARAMETERS.values.astype(str)[p]
print(cal_str)

['PRES                                                            '
 'TEMP                                                            '
 'PSAL                                                            '
 'DOXY                                                            '
 'DOWN_IRRADIANCE380                                              '
 'DOWN_IRRADIANCE412                                              '
 'DOWN_IRRADIANCE490                                              '
 'DOWNWELLING_PAR                                                 '
 'CHLA                                                            '
 'BBP700                                                          '
 'CDOM                                                            '
 'PH_IN_SITU_TOTAL                                                ']


In [97]:
o2_cal_full = sprof_n.SCIENTIFIC_CALIB_COMMENT.values[:,-1,o2_ind]
o2_eq_full = sprof_n.SCIENTIFIC_CALIB_EQUATION.values[:,-1,o2_ind]

data_comment_eq = pd.DataFrame({'o2_cal_full': o2_cal_full, 'o2_eq_full': o2_eq_full})

# Drop duplicate rows to get unique pairs
unique_pairs = data_comment_eq.drop_duplicates()

print(unique_pairs.o2_cal_full[0])

b'DOXY_ADJUSTED corrected using continuous in-air measurements as in Johnson et al. (2015)                                                                                                                                                                        '


In [90]:
# o2_eq = sprof_n.SCIENTIFIC_CALIB_EQUATION.values[0,-1,o2_ind].decode("utf-8")
# 
o2_eq = np.unique(sprof_n.SCIENTIFIC_CALIB_EQUATION.values[:,-1,o2_ind])
print(o2_eq)
print(o2_cal)

[b'PSAT = f(DOXY); PSAT_ADJUSTED = G*PSAT; DOXY_ADJUSTED = f(PSAT_ADJUSTED)                                                                                                                                                                                        '
 b'none                                                                                                                                                                                                                                                            ']
[b'Bad data; not adjustable                                                                                                                                                                                                                                        '
 b'DOXY_ADJUSTED corrected using continuous in-air measurements as in Johnson et al. (2015)                                                                                                                             

In [61]:
# cal_str_table = pd.DataFrame(columns=['wmo','o2_cal_comment'])

#also load meta file  for same float
file_n = argo_path + file
meta_n = xr.open_dataset(argo_path + file)
# meta_n
parameters_n = meta_n.PARAMETER.values
# print(parameters_n)

contains_doxy = any(parameter.decode('utf-8').strip() == 'DOXY' for parameter in parameters_n)
if contains_doxy is False:
    # continue
    print('no doxy')

# not sure if i need this, but loading the Sprof file as well

wmo_n = int((re.search(r'\d+', file)).group())

sprof_n = xr.open_dataset(argo_path + str(wmo_n) + '_Sprof.nc')

# gets order of sensors to extract calibration comments
cal_str = sprof_n.STATION_PARAMETERS.values.astype(str)[0]
# print(cal_str)

for i, param in enumerate(cal_str):
    if 'DOXY' in param:
        o2_ind = i
        break
# print(i)
o2_cal_full = sprof_n.SCIENTIFIC_CALIB_COMMENT.values[:,-1,o2_ind]
o2_cal_unique = np.unique(o2_cal_full)

# print(o2_cal_unique)
# might have multiple unique comments, so save out each one:
for i in range(len(o2_cal_unique)):
    o2_cal_i = o2_cal_unique[i].decode("utf-8")
    print(o2_cal_i)
    # save with wmo only for now
    new_data_cal_info = pd.DataFrame({'wmo': [wmo_n],
                                      'o2_cal_comment': [o2_cal_i]})
    # Append the new_data_cal_info DataFrame to cal_str_table
    cal_str_table = cal_str_table.append(new_data_cal_info, ignore_index=True)


                                                                                                                                                                                                                                                                


  cal_str_table = cal_str_table.append(new_data_cal_info, ignore_index=True)


In [38]:
cal_str.

array(['PRES                                                            ',
       'TEMP                                                            ',
       'PSAL                                                            ',
       'DOXY                                                            '],
      dtype='<U64')

[b'                                                                                                                                                                                                                                                                ']
                                                                                                                                                                                                                                                                


In [19]:
parameters_n = meta_n.PARAMETER.values
print(parameters_n)

contains_doxy = any(parameter.decode('utf-8').strip() == 'DOXY' for parameter in parameters_n)
contains_doxy


[b'PRES                                                            '
 b'TEMP                                                            '
 b'PSAL                                                            '
 b'C1PHASE_DOXY                                                    '
 b'C2PHASE_DOXY                                                    '
 b'DOXY                                                            '
 b'TEMP_DOXY                                                       '
 b'PPOX_DOXY                                                       ']


True