In [1]:
import argparse
import dask
import json
import netCDF4 as nc4
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
import time
import warnings
import xarray as xr
from dask import delayed
import datetime
import random
from pandas import read_csv

warnings.filterwarnings('ignore')

In [2]:
ecco_V4r4_metadata_dir = Path('/home/ifenty/git_repos_others/ECCO-GROUP/ECCO-ACCESS/metadata/ECCOv4r4_metadata_json')
dataset_metadata_fname = ecco_V4r4_metadata_dir / 'PODAAC_datasets-revised_20210226.5.csv'

In [3]:
# load PODAAC fields
podaac_dataset_table = read_csv(dataset_metadata_fname)

In [4]:
pprint(podaac_dataset_table['DATASET.PERSISTENT_ID'])

0     PODAAC-ECG5D-ATM44
1     PODAAC-ECG5M-ATM44
2     PODAAC-ECL5D-ATM44
3     PODAAC-ECL5M-ATM44
4     PODAAC-ECG5D-BOL44
             ...        
74    PODAAC-ECL5S-OTS44
75    PODAAC-ECL5A-GRD44
76    PODAAC-ECG5A-GRD44
77    PODAAC-ECL5A-MIX44
78    PODAAC-ECG5A-MIX44
Name: DATASET.PERSISTENT_ID, Length: 79, dtype: object


In [5]:
ds_id = list()
for id in list(podaac_dataset_table['DATASET.PERSISTENT_ID']):
    ds_id.append(id.split('PODAAC-')[1])
pprint(ds_id)

['ECG5D-ATM44',
 'ECG5M-ATM44',
 'ECL5D-ATM44',
 'ECL5M-ATM44',
 'ECG5D-BOL44',
 'ECG5M-BOL44',
 'ECL5D-BOL44',
 'ECL5M-BOL44',
 'ECG5D-ODE44',
 'ECG5M-ODE44',
 'ECL5D-ODE44',
 'ECL5M-ODE44',
 'ECG5D-FRE44',
 'ECG5M-FRE44',
 'ECL5D-FRE44',
 'ECL5M-FRE44',
 'ECTSS-MAP44',
 'ECTSD-MSL44',
 'ECTSM-MSL44',
 'ECG5D-HEA44',
 'ECG5M-HEA44',
 'ECL5D-HEA44',
 'ECL5M-HEA44',
 'ECG5D-OML44',
 'ECG5M-OML44',
 'ECL5D-OML44',
 'ECL5M-OML44',
 'ECG5D-OBP44',
 'ECG5M-OBP44',
 'ECL5D-OBP44',
 'ECL5M-OBP44',
 'ECL5S-OBP44',
 'ECL5D-3MT44',
 'ECL5M-3MT44',
 'ECL5D-3SF44',
 'ECL5M-3SF44',
 'ECL5D-3TF44',
 'ECL5M-3TF44',
 'ECL5D-3VF44',
 'ECL5M-3VF44',
 'ECL5D-STF44',
 'ECL5M-STF44',
 'ECG5D-OVE44',
 'ECG5M-OVE44',
 'ECL5D-OVE44',
 'ECL5M-OVE44',
 'ECTSS-SBO44',
 'ECG5D-ICO44',
 'ECG5M-ICO44',
 'ECL5D-ICO44',
 'ECL5M-ICO44',
 'ECL5S-ICO44',
 'ECL5D-SIH44',
 'ECL5M-SIH44',
 'ECL5D-ISP44',
 'ECL5M-ISP44',
 'ECG5D-SIV44',
 'ECG5M-SIV44',
 'ECL5D-SIV44',
 'ECL5M-SIV44',
 'ECL5S-SIV44',
 'ECG5D-SSH44',
 'ECG5M-

In [6]:
ds_shortname = list(podaac_dataset_table['DATASET.SHORT_NAME'])

In [7]:
ds_summary = list(podaac_dataset_table['DATASET.DESCRIPTION'])

In [8]:
ds_title = list(podaac_dataset_table['DATASET.LONG_NAME'])

In [9]:
ds_fname = list(podaac_dataset_table['DATASET.FILENAME'])

In [10]:
summary_second_part = 'Estimating the Circulation and Climate of the Ocean (ECCO) state estimates are dynamically and kinematically-consistent reconstructions of the three-dimensional, time-evolving ocean, sea-ice, and surface atmospheric states. ECCO V4r4 is a free-running solution of a global, nominally 1-degree configuration of the MIT general circulation model (MITgcm) that has been fit to observations in a least-squares sense. Observational data constraints used in V4r4 include sea surface height (SSH) from satellite altimeters [ERS-1/2, TOPEX/Poseidon, GFO, ENVISAT, Jason-1,2,3, CryoSat-2, and SARAL/AltiKa]; sea surface temperature (SST) from satellite radiometers [AVHRR], sea surface salinity (SSS) from the Aquarius satellite radiometer/scatterometer, ocean bottom pressure (OBP) from the GRACE satellite gravimeter; sea-ice concentration from satellite radiometers [SSM/I and SSMIS], and in-situ ocean temperature and salinity measured with conductivity-temperature-depth (CTD) sensors and expendable bathythermographs (XBTs) from several programs [e.g., WOCE, GO-SHIP, Argo, and others] and platforms [e.g., research vessels, gliders, moorings, ice-tethered profilers, and instrumented pinnipeds]. V4r4 covers the period 1992-01-01T12:00:00 to 2018-01-01T00:00:00.'
print(summary_second_part)

Estimating the Circulation and Climate of the Ocean (ECCO) state estimates are dynamically and kinematically-consistent reconstructions of the three-dimensional, time-evolving ocean, sea-ice, and surface atmospheric states. ECCO V4r4 is a free-running solution of a global, nominally 1-degree configuration of the MIT general circulation model (MITgcm) that has been fit to observations in a least-squares sense. Observational data constraints used in V4r4 include sea surface height (SSH) from satellite altimeters [ERS-1/2, TOPEX/Poseidon, GFO, ENVISAT, Jason-1,2,3, CryoSat-2, and SARAL/AltiKa]; sea surface temperature (SST) from satellite radiometers [AVHRR], sea surface salinity (SSS) from the Aquarius satellite radiometer/scatterometer, ocean bottom pressure (OBP) from the GRACE satellite gravimeter; sea-ice concentration from satellite radiometers [SSM/I and SSMIS], and in-situ ocean temperature and salinity measured with conductivity-temperature-depth (CTD) sensors and expendable bath

In [11]:
ds_summary_fix = dict()
for idi, id in enumerate(ds_id):
    print(idi, id, ds_title[idi][0:75])
    ds_summary_fix[id] = dict()
    ds_summary_fix[id]['title'] = ds_title[idi]
    ds_summary_fix[id]['shortname'] = ds_shortname[idi]
    ds_summary_fix[id]['summary_first_part']= ds_summary[idi].split('Estimating the Circulation and Climate of the Ocean')[0]
    ds_summary_fix[id]['summary']= ds_summary_fix[id]['summary_first_part'] + summary_second_part
    ds_summary_fix[id]['filename'] = ds_fname[idi]

0 ECG5D-ATM44 ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Daily M
1 ECG5M-ATM44 ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Monthly
2 ECL5D-ATM44 ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Daily M
3 ECL5M-ATM44 ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Monthly
4 ECG5D-BOL44 ECCO Gent-McWilliams Ocean Bolus Velocity - Daily Mean 0.5 Degree (Version 
5 ECG5M-BOL44 ECCO Gent-McWilliams Ocean Bolus Velocity - Monthly Mean 0.5 Degree (Versio
6 ECL5D-BOL44 ECCO Gent-McWilliams Ocean Bolus Velocity - Daily Mean llc90 Grid (Version 
7 ECL5M-BOL44 ECCO Gent-McWilliams Ocean Bolus Velocity - Monthly Mean llc90 Grid (Versio
8 ECG5D-ODE44 ECCO Ocean Density, Stratification, and Hydrostatic Pressure - Daily Mean 0
9 ECG5M-ODE44 ECCO Ocean Density, Stratification, and Hydrostatic Pressure - Monthly Mean
10 ECL5D-ODE44 ECCO Ocean Density, Stratification, and Hydrostatic Pressure - Daily Mean l
11 ECL5M-

In [12]:
ds_summary_fix[id]['summary_first_part']

'This dataset provides 3D time-invariant coefficients for the Gent-McWilliams and Redi parameterizations and background vertical diffusivity interpolated to a regular 0.5-degree grid from the ECCO Version 4 Release 4 (V4r4) ocean and sea-ice state estimate. '

In [13]:
ds_summary_fix[id]['summary']

'This dataset provides 3D time-invariant coefficients for the Gent-McWilliams and Redi parameterizations and background vertical diffusivity interpolated to a regular 0.5-degree grid from the ECCO Version 4 Release 4 (V4r4) ocean and sea-ice state estimate. Estimating the Circulation and Climate of the Ocean (ECCO) state estimates are dynamically and kinematically-consistent reconstructions of the three-dimensional, time-evolving ocean, sea-ice, and surface atmospheric states. ECCO V4r4 is a free-running solution of a global, nominally 1-degree configuration of the MIT general circulation model (MITgcm) that has been fit to observations in a least-squares sense. Observational data constraints used in V4r4 include sea surface height (SSH) from satellite altimeters [ERS-1/2, TOPEX/Poseidon, GFO, ENVISAT, Jason-1,2,3, CryoSat-2, and SARAL/AltiKa]; sea surface temperature (SST) from satellite radiometers [AVHRR], sea surface salinity (SSS) from the Aquarius satellite radiometer/scatteromet

In [14]:
for idi, id in enumerate(ds_summary_fix.keys()):
    print(f'\n{idi:02}: {id}')
    print('   ', ds_summary_fix[id]['shortname'])
    print('   ', ds_summary_fix[id]['title'])

    print('---------------------------------------------------------------------------------------------------')
    print(ds_summary_fix[id]['summary_first_part'], '\n')


00: ECG5D-ATM44
    ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4
    ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Daily Mean 0.5 Degree (Version 4 Release 4)
---------------------------------------------------------------------------------------------------
This dataset provides daily-averaged atmosphere surface temperature, humidity, wind, and pressure interpolated to a regular 0.5-degree grid from the ECCO Version 4 Release 4 (V4r4) ocean and sea-ice state estimate.  


01: ECG5M-ATM44
    ECCO_L4_ATM_STATE_05DEG_MONTHLY_V4R4
    ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Monthly Mean 0.5 Degree (Version 4 Release 4)
---------------------------------------------------------------------------------------------------
This dataset provides monthly-averaged atmosphere surface temperature, humidity, wind, and pressure interpolated to a regular 0.5-degree grid from the ECCO Version 4 Release 4 (V4r4) ocean and sea-ice state estimate.  


02: ECL5D-ATM

In [15]:
ds_summary_fix

{'ECG5D-ATM44': {'title': 'ECCO Atmosphere Surface Temperature, Humidity, Wind, and Pressure - Daily Mean 0.5 Degree (Version 4 Release 4)',
  'shortname': 'ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4',
  'summary_first_part': 'This dataset provides daily-averaged atmosphere surface temperature, humidity, wind, and pressure interpolated to a regular 0.5-degree grid from the ECCO Version 4 Release 4 (V4r4) ocean and sea-ice state estimate. ',
  'summary': 'This dataset provides daily-averaged atmosphere surface temperature, humidity, wind, and pressure interpolated to a regular 0.5-degree grid from the ECCO Version 4 Release 4 (V4r4) ocean and sea-ice state estimate. Estimating the Circulation and Climate of the Ocean (ECCO) state estimates are dynamically and kinematically-consistent reconstructions of the three-dimensional, time-evolving ocean, sea-ice, and surface atmospheric states. ECCO V4r4 is a free-running solution of a global, nominally 1-degree configuration of the MIT general circulat

In [16]:
output_dir = Path('/home/ifenty/git_repos_others/ECCO-GROUP/ECCO-ACCESS/metadata/ECCOv4r4_metadata_json')

In [17]:
with open(output_dir / 'ECCOv4r4_dataset_summary.json', 'w') as outfile:
    outfile.write(json.dumps(ds_summary_fix, indent=4))