In [1]:
import argparse
import dask
import json
import netCDF4 as nc4
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
import time
import warnings
import xarray as xr
from dask import delayed
warnings.filterwarnings('ignore')

In [2]:
import warnings
warnings.filterwarnings('ignore')
import dask
dask.config.set({'temporary_directory': '/mnt/intraid/ian1/ifenty/dask_tmp'})

# Works on Ian's Machine, but not necessary
from dask.distributed import Client, progress
#client = Client(processes=False, threads_per_worker=48)
client = Client(processes=False, n_workers=1, threads_per_worker=8,memory_limit='128GB')
client

0,1
Client  Scheduler: inproc://137.78.251.47/171569/1  Dashboard: http://137.78.251.47:8787/status,Cluster  Workers: 1  Cores: 8  Memory: 128.00 GB


In [3]:
def get_groupings(base_dir, grid_type, time_type):
    groupings = dict()
    tmp = Path(f'{base_dir}/{grid_type}/{time_type}')
    print(tmp)
    if tmp.exists():
        for pi, p in enumerate(tmp.iterdir()):
            grouping = str(p).split('/')[-1]
            groupings[pi] = dict()
            groupings[pi]['name'] = grouping
            groupings[pi]['grid'] = grid_type
            groupings[pi]['time_type'] = time_type
            groupings[pi]['directory'] = p
            
    return groupings

In [4]:
@delayed
def load_ecco_file(filename):
    time_start=time.time()
    print(filename.name)
    ecco_field = xr.open_dataset(filename).load()
    return ecco_field

In [5]:
@delayed
def get_minmax(ecco_field):
    results_da = dict()
    for dv in ecco_field.data_vars:
        results_da[dv] = dict()
        tmp_min = ecco_field[dv].min()
        tmp_max = ecco_field[dv].max()
        
        results_da[dv]['valid_min'] = tmp_min.values
        results_da[dv]['valid_max'] = tmp_max.values
    
    return results_da   

In [7]:
def construct_DS(results, grouping_info, ds_title, ds_id, delta_time):
    dvs = list(results_da_compute[0].keys())

    X = dict()
    DAs = []

    # loop through all data varaibles
    for dv in dvs:
        print(dv)
        X[dv] = dict()
        X[dv]['valid_max'] = []
        X[dv]['valid_min'] = []

        # loop through all records
        for r in results:
            X[dv]['valid_min'].append(r[dv]['valid_min'])
            X[dv]['valid_max'].append(r[dv]['valid_max'])

        # final min max for all records
        valid_min = np.array(X[dv]['valid_min']).min() 
        valid_max = np.array(X[dv]['valid_max']).max() 
           
        # construct data array with valid min and max
        tmp = xr.DataArray([valid_min, valid_max], dims=['valid_min_max'])
        tmp.name = dv
        DAs.append(tmp)

        
    DS = xr.merge(DAs)
    DS.attrs['title']     = ds_title
    DS.attrs['name']      = grouping_info['name']
    DS.attrs['grid']      = grouping_info['grid']
    DS.attrs['time_type'] = grouping_info['time_type']
    DS.attrs['id']        = ds_id
    DS.attrs['shortname'] = ds_id.split('/')[1]
    DS.attrs['directory'] = str(grouping_info['directory'])
    DS.attrs['calc_time_seconds'] = delta_time

    return DS

## Inputs

In [8]:
dataset_base_dir = Path('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/')

In [9]:
grids = ['native','latlon']
time_avgs = ['day_inst', 'day_mean','mon_mean']

In [10]:
def f(ecco_files):
    results = []
    
    for file in ecco_files:
        ecco_field = load_ecco_file(file)
        result = get_minmax(ecco_field)
        results.append(result)
        
    return results

## Calc

In [11]:
time_type = time_avgs[2]
grid_type = grids[0]

groupings = get_groupings(dataset_base_dir, grid_type, time_type)

/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean


In [12]:
for gi in groupings:
    print(gi, groupings[gi]['name'])

0 ATM_SURFACE_TEMP_HUM_WIND_PRES
1 OCEAN_3D_MOMENTUM_TEND
2 OCEAN_3D_SALINITY_FLUX
3 OCEAN_3D_TEMPERATURE_FLUX
4 OCEAN_3D_VOLUME_FLUX
5 OCEAN_AND_ICE_SURFACE_FW_FLUX
6 OCEAN_AND_ICE_SURFACE_HEAT_FLUX
7 OCEAN_AND_ICE_SURFACE_STRESS
8 OCEAN_BOLUS_STREAMFUNCTION
9 OCEAN_BOLUS_VELOCITY
10 OCEAN_BOTTOM_PRESSURE
11 OCEAN_DENS_STRAT_PRESS
12 OCEAN_MIXED_LAYER_DEPTH
13 OCEAN_TEMPERATURE_SALINITY
14 OCEAN_VELOCITY
15 SEA_ICE_CONC_THICKNESS
16 SEA_ICE_HORIZ_VOLUME_FLUX
17 SEA_ICE_SALT_PLUME_FLUX
18 SEA_ICE_VELOCITY
19 SEA_SURFACE_HEIGHT


In [43]:
gi = 3
print(groupings[gi])

{'name': 'OCEAN_3D_TEMPERATURE_FLUX', 'grid': 'native', 'time_type': 'mon_mean', 'directory': PosixPath('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/native/mon_mean/OCEAN_3D_TEMPERATURE_FLUX')}


In [44]:
data_dir = groupings[gi]['directory']
glob_name = '*ECCO*nc'
ecco_files = np.sort(list(data_dir.glob(glob_name)))

In [45]:
start_time = time.time()
results_da_compute = dask.compute(f(ecco_files))[0]
delta_time = time.time() - start_time

OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1993-05_ECCO_V4r4_native_llc0090.ncOCEAN_3D_TEMPERATURE_FLUX_mon_mean_2008-02_ECCO_V4r4_native_llc0090.ncOCEAN_3D_TEMPERATURE_FLUX_mon_mean_2005-09_ECCO_V4r4_native_llc0090.ncOCEAN_3D_TEMPERATURE_FLUX_mon_mean_2011-10_ECCO_V4r4_native_llc0090.nc


OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2007-11_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2013-05_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2012-03_ECCO_V4r4_native_llc0090.ncOCEAN_3D_TEMPERATURE_FLUX_mon_mean_1992-01_ECCO_V4r4_native_llc0090.nc


OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2001-09_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2011-09_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2010-07_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2010-09_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2008-09_ECCO_V4r4_native_llc0090.ncOCEAN_3D_TEMPERATURE_FLUX_mon_mean_2004-03_ECCO_V4r4_native_llc0090.nc
OCEAN_3

OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2015-12_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1995-12_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1994-09_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1994-10_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1992-11_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2007-04_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2011-02_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2013-02_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2017-05_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2002-06_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2006-06_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2004-06_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1999-04_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2007-09_ECCO_V4r4_native_llc0090.nc
OCEAN_

OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1993-04_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2003-06_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1996-11_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1999-08_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1999-11_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2008-10_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1995-04_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2017-01_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2017-06_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2014-12_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1998-11_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2015-02_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_2011-04_ECCO_V4r4_native_llc0090.nc
OCEAN_3D_TEMPERATURE_FLUX_mon_mean_1998-10_ECCO_V4r4_native_llc0090.nc
OCEAN_

In [46]:
tmp_file = xr.open_dataset(ecco_files[0])
ds_title = tmp_file.attrs['title']
ds_id = tmp_file.attrs['id']

In [47]:
print(delta_time)

240.51060318946838


In [48]:
DS = construct_DS(results_da_compute, groupings[gi], ds_title, ds_id, delta_time)
DS

ADVx_TH
DFxE_TH
ADVy_TH
DFyE_TH
ADVr_TH
DFrE_TH
DFrI_TH


In [42]:
filename = f"valid_minmax_{DS.attrs['name']}_{DS.attrs['grid']}_{DS.attrs['time_type']}_{DS.attrs['shortname']}.nc"
filename

'valid_minmax_SEA_ICE_VELOCITY_native_mon_mean_ECL5M-SIV44.nc'

In [33]:
output_dir = Path('/home/ifenty/ian1/ifenty/ECCOv4/Version4/Release4/podaac/valid_minmax_20210311c')
if not output_dir.exists():
    output_dir.mkdir()

In [34]:
DS.to_netcdf(output_dir / filename)