In [3]:
import argparse
import dask
import json
import netCDF4 as nc4
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
import time
import warnings
import xarray as xr

warnings.filterwarnings('ignore')

In [6]:
def load_ecco_fields(data_dir, glob_name):
    time_start=time.time()

    ecco_fields = []
    # opening 312 monthly mean files takes about 40s using parallel & dask
    
    ecco_files = list(data_dir.glob(glob_name))
    print(ecco_files[0:5])
    ecco_fields = xr.open_mfdataset(ecco_files, parallel=True, data_vars='minimal',\
                                  coords='minimal',compat='override')
    
    tt = time.time() - time_start    
    print(tt / len(ecco_fields))
    print(time.time() - time_start)
    return ecco_fields

In [21]:
def get_groupings(base_dir, grid_type, time_avg):
    groupings = dict()
    tmp = Path(f'{base_dir}/{grid_type}/{time_avg}')
    print(tmp)
    if tmp.exists():
        for pi, p in enumerate(tmp.iterdir()):
            grouping = str(p).split('/')[-1]
            groupings[pi] = dict()
            groupings[pi]['name'] = grouping
            groupings[pi]['grid'] = grid_type
            groupings[pi]['time_avg'] = time_avg
            groupings[pi]['directory'] = p
            
    return groupings

In [88]:
def calc_valid_minmax(ecco_fields):
    t0 = time.time()
    results_da = dict()
    for dv in ecco_fields.data_vars:
        print(dv)
        results_da[dv] = dict()
        results_da[dv]['valid_max'] = ecco_fields[dv].max()
        results_da[dv]['valid_min'] = ecco_fields[dv].min()

    results_da_compute = dask.compute(results_da)[0]
    delta_time = time.time()-t0
    
    DAs = []
    for dv in ecco_fields.data_vars:
        print(dv)
        valid_max = results[dv]['valid_max'].values
        valid_min = results[dv]['valid_min'].values
        print(valid_max, valid_min)
        tmp = xr.DataArray([valid_min, valid_max], dims=['valid_min_max'])
        tmp.name = dv
        DAs.append(tmp)

    DS = xr.merge(DAs)
    DS.attrs['title']     = ecco_fields.attrs['title']
    DS.attrs['name']      = groupings[gi]['name']
    DS.attrs['grid']      = groupings[gi]['grid']
    DS.attrs['time_avg']  = groupings[gi]['time_avg']
    DS.attrs['id']        = ecco_fields.attrs['id']
    DS.attrs['shortname'] = ecco_fields.attrs['id'].split('/')[1]
    DS.attrs['directory'] = str(groupings[gi]['directory'])
    DS.attrs['calc_time_seconds'] = delta_time
    
    return DS

## Inputs

In [89]:
gi = 0;

In [90]:
dataset_base_dir = Path('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/')

In [91]:
grids = ['native','latlon']
time_avgs = ['day_inst', 'day_mean','mon_mean']

## Calc

In [92]:
groupings = get_groupings(dataset_base_dir, grids[0], time_avgs[2])
groupings

/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean


{0: {'name': 'ATM_SURFACE_TEMP_HUM_WIND_PRES',
  'grid': 'native',
  'time_avg': 'mon_mean',
  'directory': PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES')},
 1: {'name': 'OCEAN_3D_MOMENTUM_TEND',
  'grid': 'native',
  'time_avg': 'mon_mean',
  'directory': PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/OCEAN_3D_MOMENTUM_TEND')},
 2: {'name': 'OCEAN_3D_SALINITY_FLUX',
  'grid': 'native',
  'time_avg': 'mon_mean',
  'directory': PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/OCEAN_3D_SALINITY_FLUX')},
 3: {'name': 'OCEAN_3D_TEMPERATURE_FLUX',
  'grid': 'native',
  'time_avg': 'mon_mean',
  'directory': PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/OCEAN_3D_TEMPERATURE_FLUX')},
 4: {'name': 'OCEAN_3D_VOLUME_FLUX',
  'grid': 'native',
  'time_avg': 'mon_mean',
  'directory': PosixPath('/home/ifenty/ian1/ifenty/

In [93]:
ecco_fields = load_ecco_fields(groupings[gi]['directory'], '*ECCO*nc')

[PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES/ATM_SURFACE_TEMP_HUM_WIND_PRES_mon_mean_2009-06_ECCO_V4r4_native_llc0090.nc'), PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES/ATM_SURFACE_TEMP_HUM_WIND_PRES_mon_mean_2004-01_ECCO_V4r4_native_llc0090.nc'), PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES/ATM_SURFACE_TEMP_HUM_WIND_PRES_mon_mean_1994-10_ECCO_V4r4_native_llc0090.nc'), PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES/ATM_SURFACE_TEMP_HUM_WIND_PRES_mon_mean_1995-01_ECCO_V4r4_native_llc0090.nc'), PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES/ATM_SURFACE_TEMP_HUM_WIND_PRES_mon_mean_2016-03_ECCO_V4r4_native_llc0090.nc')]
4.140118360519409
24.84088802

In [94]:
DS = calc_valid_minmax(ecco_fields)

EXFatemp
EXFaqh
EXFuwind
EXFvwind
EXFwspee
EXFpress
EXFatemp
310.3392 206.26051
EXFaqh
0.027018907 -0.0005956685
EXFuwind
14.148304 -15.277663
EXFvwind
14.113276 -12.577562
EXFwspee
17.286043 0.8969294
EXFpress
104061.52 95818.04


In [95]:
pprint(DS.attrs)

{'calc_time_seconds': 40.376078367233276,
 'directory': '/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/ATM_SURFACE_TEMP_HUM_WIND_PRES',
 'grid': 'native',
 'id': '10.5067/ECL5M-ATM44',
 'name': 'ATM_SURFACE_TEMP_HUM_WIND_PRES',
 'shortname': 'ECL5M-ATM44',
 'time_avg': 'mon_mean',
 'title': 'ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - '
          'Monthly Mean llc90 Grid (Version 4 Release 4)'}


In [96]:
DS

In [97]:
filename = f"valid_minmax_{DS.attrs['name']}_{DS.attrs['grid']}_{DS.attrs['time_avg']}_{DS.attrs['shortname']}.nc"
filename

'valid_minmax_ATM_SURFACE_TEMP_HUM_WIND_PRES_native_mon_mean_ECL5M-ATM44.nc'

In [98]:
output_dir = Path('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/valid_minmax')
if not output_dir.exists():
    output_dir.mkdir()

In [99]:
DS.to_netcdf(output_dir / filename)