In [1]:
import argparse
import dask
import json
import netCDF4 as nc4
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
import time
import warnings
import xarray as xr
from dask import delayed
import datetime
import random
warnings.filterwarnings('ignore')

In [2]:
def load_valid_minmax(valid_minmax_dir):
    valid_minmax_files = list(valid_minmax_dir.glob('**/valid_minmax*.nc'))

    minmax = dict()
    for mmf in valid_minmax_files:
        tmp = xr.open_dataset(mmf)
        shortname = tmp.attrs['shortname']
        id = tmp.attrs['id']

        minmax[shortname] = dict()
        for dv in tmp.data_vars:
            minmax[shortname][dv] = dict()
            minmax[shortname][dv]['valid_min'] = tmp[dv].values[0]
            minmax[shortname][dv]['valid_max'] = tmp[dv].values[1]
                    
    return minmax

In [3]:
comment_fix = dict()

comment_fix["oceSPDep"] = dict()
comment_fix["oceSPDep"]['filename'] = "SEA_ICE_SALT_PLUME_FLUX"
comment_fix["oceSPDep"]['comments'] = "Depth of parameterized salt plumes formed due to brine rejection during sea-ice formation."

comment_fix["RHOAnoma"] = dict()
comment_fix["RHOAnoma"]['filename'] = "OCEAN_DENS_STRAT_PRESS"
comment_fix["RHOAnoma"]['comments'] = "In-situ seawater density anomaly relative to the reference density, rhoConst. rhoConst = 1029 kg m-3"

comment_fix["SALT"] = dict()
comment_fix["SALT"]['filename'] = "OCEAN_TEMPERATURE_SALINITY"
comment_fix["SALT"]['comments'] = "Defined using CF convention 'Sea water salinity is the salt content of sea water, often on the Practical Salinity Scale of 1978. However, the unqualified term 'salinity' is generic and does not necessarily imply any particular method of calculation. The units of salinity are dimensionless and the units attribute should normally be given as 1e-3 or 0.001 i.e. parts per thousand.' see https://cfconventions.org/Data/cf-standard-names/73/build/cf-standard-name-table.html"

comment_fix["SIsnPrcp"] = dict()
comment_fix["SIsnPrcp"]['filename'] = "OCEAN_AND_ICE_SURFACE_FW_FLUX"
comment_fix["SIsnPrcp"]['comments'] = "Snow precipitation rate over sea-ice, averaged over the entire model grid cell."

In [41]:
def apply_fixes(ecco_filename, minmax, comment_fix, qc_prob):

    comment_keys = list(comment_fix.keys())
    
    try:
        with nc4.Dataset(ecco_filename, mode='r+') as tmp_ds:
            shortname = tmp_ds.id.split('/')[1]

            print(shortname)
            if shortname in minmax.keys():
                
                nc_dvs = list(tmp_ds.variables)
                minmax_dvs = list(minmax[shortname].keys())
                
                for minmax_dv in minmax_dvs:
                    if minmax_dv in nc_dvs:
                        new_valid_min = minmax[shortname][minmax_dv]['valid_min']
                        new_valid_max = minmax[shortname][minmax_dv]['valid_max']

                        qc_rand = random.random()
                            
                        v = tmp_ds.variables[minmax_dv]
                        
                        if qc_rand < qc_prob:
                            # load actual vmin vmax here
                            v_min = np.nanmin(v[:])#.values)
                            v_max = np.nanmax(v[:])#tmp_ds_xr_v.values)
                                                        
                            # pull old valid min max attributes
                            old_valid_min = v.valid_min
                            old_valid_max = v.valid_max

                            print(f'   QC {minmax_dv}')
                            print(f'   QC current dv actual vmin/vmax : {v_min:.12} {v_max:.12}')
                            print(f'   QC old valid_min/max           : {old_valid_min:.12} {old_valid_max:.12}')
                            print(f'   QC new valid_min/max           : {new_valid_min:.12} {new_valid_max:.12}')

                            if old_valid_min > v_min or old_valid_max < v_max:
                                print('   ...old valid min/max was wrong')
                                print('   1. old valid min >= vmin', old_valid_min, v_min, old_valid_min >= v_min)
                                print('   2. old valid max <= vmax', old_valid_max, v_max, old_valid_max <= v_max)
                            else:
                                print('   ...old valid min/max was ok')
       
                            if new_valid_min > v_min or new_valid_max < v_max:
                                print('   ...new valid min/max is wrong')
                                print('   1. new valid min >= vmin', new_valid_min, v_min, new_valid_min >= v_min)
                                print('   2. new valid max <= vmax', new_valid_max, v_max, new_valid_max <= v_max)
                                return 0

                            else:
                                print('   ...new valid min/max is ok')
                                
                                      
                        v.setncattr("valid_min", new_valid_min)
                        v.setncattr("valid_max", new_valid_max)

                        # fix units on EXFatemp
                        if minmax_dv == 'EXFatemp':
                            print ("   units fix old:", v.units)
                            v.setncattr("units", 'degree_K')
                            print ("   units fix new:", v.units)

                        # fix comments on select variables
                        if minmax_dv in comment_keys:
                            comment_fix_fname = comment_fix[minmax_dv]['filename']

                            # verify filename matches
                            if comment_fix_fname in ecco_filename.name:
                                
                                print("   comment fix", minmax_dv, comment_fix_fname)
                                v.setncattr("comment", comment_fix[minmax_dv]["comments"])
                            else:
                                print("   comment fix: minmax_dv in keys but filename mismatch")
                                return 0
                    else:
                        print(f'{minmax_dv} not in {nc_dvs}')
                   

                # update date of modified metadata
                current_time = datetime.datetime.now().isoformat()[0:19]
                tmp_ds.setncattr('date_modified', current_time)
                tmp_ds.setncattr('date_metadata_modified', current_time)
                #tmp_ds.close()

        print(f"+ SUCCESS: changes applied {ecco_filename.name}\n")
        return 1
    except Exception as e:
        raise e
    
    return 0

In [30]:
def f1(ecco_files, minmax, comment_fix, qc_prob):
    results = []
    for ecco_filename in ecco_files:
        result = apply_fixes(ecco_filename, minmax, comment_fix, qc_prob)
        results.append(result)
    return results

In [31]:
def get_groupings(base_dir, grid_type, time_type):
    groupings = dict()
    tmp = Path(f'{base_dir}/{grid_type}/{time_type}')
    print(tmp)
    if tmp.exists():
        g_dirs = np.sort(list(tmp.iterdir()))
        for pi, p in enumerate(g_dirs):
            grouping = str(p).split('/')[-1]
            groupings[pi] = dict()
            groupings[pi]['name'] = grouping
            groupings[pi]['grid'] = grid_type
            groupings[pi]['time_type'] = time_type
            groupings[pi]['directory'] = p
            
    return groupings

## Inputs

In [7]:
grids = ['native','latlon']
time_avgs = ['day_inst', 'day_mean','mon_mean']

In [8]:
valid_minmax_dir = Path('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/valid_minmax/valid_minmax_final')

In [9]:
minmax = load_valid_minmax(valid_minmax_dir)
pprint(minmax.keys())
len(minmax.keys())

dict_keys(['ECL5S-SSH44', 'ECL5S-ICO44', 'ECL5S-OTS44', 'ECL5S-OBP44', 'ECL5S-SIV44', 'ECL5D-SIH44', 'ECL5D-OBP44', 'ECL5D-ODE44', 'ECL5D-STF44', 'ECL5D-OML44', 'ECL5D-HEA44', 'ECL5D-ICO44', 'ECL5D-STR44', 'ECL5D-3MT44', 'ECL5D-ATM44', 'ECL5D-BOL44', 'ECL5D-OVE44', 'ECL5D-FRE44', 'ECL5D-SSH44', 'ECL5D-3VF44', 'ECL5D-SIV44', 'ECL5D-OTS44', 'ECL5D-3TF44', 'ECL5D-3SF44', 'ECL5D-ISP44', 'ECL5M-OVE44', 'ECL5M-SIH44', 'ECL5M-ATM44', 'ECL5M-BOL44', 'ECL5M-SIV44', 'ECL5M-ICO44', 'ECL5M-ISP44', 'ECL5M-3VF44', 'ECL5M-3SF44', 'ECL5M-HEA44', 'ECL5M-OBP44', 'ECL5M-3TF44', 'ECL5M-SSH44', 'ECL5M-STR44', 'ECL5M-FRE44', 'ECL5M-OTS44', 'ECL5M-ODE44', 'ECL5M-3MT44', 'ECL5M-OML44', 'ECL5M-STF44', 'ECG5D-HEA44', 'ECG5D-SIV44', 'ECG5D-STR44', 'ECG5D-OBP44', 'ECG5D-OTS44', 'ECG5D-ODE44', 'ECG5D-FRE44', 'ECG5D-OML44', 'ECG5D-SSH44', 'ECG5D-OVE44', 'ECG5D-ATM44', 'ECG5D-ICO44', 'ECG5D-BOL44', 'ECG5M-SIV44', 'ECG5M-ICO44', 'ECG5M-OVE44', 'ECG5M-OTS44', 'ECG5M-OBP44', 'ECG5M-OML44', 'ECG5M-BOL44', 'ECG5M-HEA44',

71

In [10]:
dataset_base_dir = Path('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac_test/')

In [11]:
time_type = time_avgs[2]
grid_type = grids[0]

groupings = get_groupings(dataset_base_dir, grid_type, time_type)

/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac_test/native/mon_mean


In [12]:
for gi in groupings:
    print(gi, groupings[gi]['name'])

0 ATM_SURFACE_TEMP_HUM_WIND_PRES
1 OCEAN_AND_ICE_SURFACE_FW_FLUX
2 OCEAN_AND_ICE_SURFACE_HEAT_FLUX
3 OCEAN_AND_ICE_SURFACE_STRESS
4 OCEAN_BOLUS_VELOCITY
5 OCEAN_BOTTOM_PRESSURE
6 OCEAN_DENS_STRAT_PRESS
7 OCEAN_MIXED_LAYER_DEPTH
8 OCEAN_TEMPERATURE_SALINITY
9 OCEAN_VELOCITY
10 SEA_ICE_CONC_THICKNESS
11 SEA_ICE_VELOCITY
12 SEA_SURFACE_HEIGHT


In [57]:
gi = 5
print(groupings[gi])
grouping_info = groupings[gi]

{'name': 'OCEAN_BOTTOM_PRESSURE', 'grid': 'native', 'time_type': 'mon_mean', 'directory': PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac_test/native/mon_mean/OCEAN_BOTTOM_PRESSURE')}


## Calc

In [63]:
glob_name = '**/*ECCO_V4r4*nc'
ecco_files = np.sort(list(grouping_info['directory'].glob(glob_name)))
print(len(ecco_files))
ecco_files[0]

312


PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac_test/native/mon_mean/OCEAN_BOTTOM_PRESSURE/OCEAN_BOTTOM_PRESSURE_mon_mean_1992-01_ECCO_V4r4_latlon_0p50deg.nc')

In [44]:
ecco_files = ecco_files[0:10]

In [64]:
qc_prob = 0.01

In [65]:
start_time = time.time()
f1_out = f1(ecco_files, minmax, comment_fix,qc_prob)
delta_time = time.time() - start_time
time_per = delta_time / len(ecco_files)


ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-01_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-02_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-03_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-04_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-05_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-06_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-07_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-08_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1992-09_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applie

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1998-09_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1998-10_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1998-11_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1998-12_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1999-01_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1999-02_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1999-03_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1999-04_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_1999-05_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applie

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2004-10_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2004-11_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2004-12_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2005-01_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2005-02_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2005-03_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2005-04_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2005-05_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2005-06_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applie

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-04_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-05_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-06_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-07_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-08_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-09_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-10_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-11_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2011-12_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applie

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2017-08_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2017-09_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2017-10_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2017-11_ECCO_V4r4_latlon_0p50deg.nc

ECG5M-OBP44
+ SUCCESS: changes applied OCEAN_BOTTOM_PRESSURE_mon_mean_2017-12_ECCO_V4r4_latlon_0p50deg.nc



In [66]:
np.unique(f1_out)

array([1])

In [67]:
print(delta_time)
print(time_per)
print(time_per*9000)

5.244294881820679
0.016808637441732943
151.2777369755965


In [68]:
ecco_field = xr.open_dataset(ecco_files[0])
ecco_field.close()
shortname = ecco_field.attrs['id'].split('/')[1]
list(ecco_field.data_vars)
print(shortname)

ECG5M-OBP44


In [69]:
for dv in ecco_field.data_vars:
    print(dv)
    pprint(ecco_field[dv].attrs)
    print('\n')
    pprint(minmax[shortname][dv])
    print('\n\n')

OBP
{'comment': 'OBP excludes the contribution from global mean atmospheric '
            'pressure and is therefore suitable for comparisons with GRACE '
            'data products. OBP is calculated as follows. First, we calculate '
            'ocean hydrostatic bottom pressure anomaly, PHIBOT, with PHIBOT = '
            'p_b/rhoConst - gH(t), where p_b = model ocean hydrostatic bottom '
            'pressure, rhoConst = reference density (1029 kg m-3), g is '
            'acceleration due to gravity (9.81 m s-2), and H(t) is model depth '
            'at time t. Then, OBP = PHIBOT/g + corrections for i) global mean '
            'steric sea level changes related to density changes in the '
            'Boussinesq volume-conserving model (Greatbatch correction, see '
            'sterGloH) and ii) global mean atmospheric pressure variations. '
            'Use OBP for comparisons with ocean bottom pressure data products '
            'that have been corrected for global mean atmosp

In [70]:
sn = ecco_field.attrs['id'].split('/')[1]
print(sn)
for dv in ecco_field.data_vars:
    print('\t',dv, ecco_field[dv].attrs['units'])
    print('\t ef:',ecco_field[dv].attrs['valid_min'], ecco_field[dv].attrs['valid_max'])
    print('\t mm:',minmax[sn][dv]['valid_min'], minmax[sn][dv]['valid_max'])

ECG5M-OBP44
	 OBP m
	 ef: -1.7899188995361328 72.07011413574219
	 mm: -1.7899188995361328 72.07011413574219
	 OBPGMAP m
	 ef: 8.223706245422363 82.08540344238281
	 mm: 8.223706245422363 82.08540344238281


In [71]:
ecco_field.attrs

{'acknowledgement': 'This research was carried out by the Jet Propulsion Laboratory, managed by the California Institute of Technology under a contract with the National Aeronautics and Space Administration.',
 'author': 'Ian Fenty and Ou Wang',
 'cdm_data_type': 'Grid',
 'comment': 'Fields provided on a regular lat-lon grid. They have been mapped to the regular lat-lon grid from the original ECCO lat-lon-cap 90 (llc90) native model grid. Ocean bottom pressure given in equivalent water thickness excluding (OBP) and including (OBPGMAP) the contribution from global mean atmospheric pressure.',
 'Conventions': 'CF-1.8, ACDD-1.3',
 'creator_email': 'ecco-group@mit.edu',
 'creator_institution': 'NASA Jet Propulsion Laboratory (JPL)',
 'creator_name': 'ECCO Consortium',
 'creator_type': 'group',
 'creator_url': 'https://ecco-group.org',
 'date_created': '2020-12-18T09:40:36',
 'date_issued': '2020-12-18T09:40:36',
 'geospatial_bounds_crs': 'EPSG:4326',
 'geospatial_lat_max': 90.0,
 'geospati