In [1]:
from pathlib import Path
import numpy as np
import yaml
import sys

In [2]:
pipeline_dir = Path('/home/ep/shared/ECCO-pipeline-ep/observations')
conf_dir = Path('/home/ep/shared/ECCO-pipeline-ep/ECCO-pipeline/ecco_pipeline/conf/ds_configs/')

In [3]:
datasets = np.sort(list(pipeline_dir.glob('*')))

In [4]:
datasets

array([PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/AMSR-2_OSI-408'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/AQUARIUS_L3_SSS_SMI_MONTHLY_V5'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/ATL20_V004_daily'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/ATL20_V004_monthly'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/AVHRR_OI-NCEI-L4-GLOB-v2.0'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/AVHRR_OI-NCEI-L4-GLOB-v2.1'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/G02202_V4'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/G10016_V2'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/GRC_FGO_GRIDDED_AOD1B_JPL_MASCON_RL06'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/L3_DEBIAS_LOCEAN_v8_q09'),
       PosixPath('/home/ep/shared/ECCO-pipeline-ep/observations/L3_DEBIAS_LOCEAN_v8_q18'),
       PosixPath('/ho

In [5]:
yaml_exists = []
yaml_missing = []
yamls = dict()

yaml_missing = []
for ds in datasets:        
    try:
        with open(conf_dir / f'{ds.name}.yaml', 'r') as file:
          yamls[ds.name] = yaml.safe_load(file)
          yaml_exists.append(ds.name)
    except:
        yaml_missing.append(ds.name)

print(f'YAML LOADED: {len(yaml_exists)}')
print('------------')
for y in yaml_exists:
    print(f'  {y}')


print(f'\nYAML MISSING FOR DIRECTORIES: {len(yaml_missing)}')
print('-----------------------------')
for y in yaml_missing:
    print(f'  {y}')

YAML LOADED: 21
------------
  AMSR-2_OSI-408
  AQUARIUS_L3_SSS_SMI_MONTHLY_V5
  ATL20_V004_daily
  ATL20_V004_monthly
  AVHRR_OI-NCEI-L4-GLOB-v2.0
  AVHRR_OI-NCEI-L4-GLOB-v2.1
  G02202_V4
  G10016_V2
  L3_DEBIAS_LOCEAN_v8_q09
  L3_DEBIAS_LOCEAN_v8_q18
  MODIS_AQUA_L3_SST_THERMAL_DAILY_9KM_DAYTIME_V2019.0
  OISSS_L4_multimission_monthly_v2
  RDEFT4
  SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205
  SMAP_RSS_L3_SSS_SMI_MONTHLY_V4
  SSMIS_OSI-430-a_daily
  SSMIS_OSI-430-a_monthly
  SSMIS_OSI-450-a_daily
  SSMIS_OSI-450-a_monthly
  TELLUS_GRAC-GRFO_MASCON_CRI_GRID_RL06.1_V3
  TELLUS_GRFO_L3_CSR_RL06.2_OCN_v04

YAML MISSING FOR DIRECTORIES: 4
-----------------------------
  GRC_FGO_GRIDDED_AOD1B_JPL_MASCON_RL06
  SSMIS_OSI-401-b
  TELLUS_GRAC-GRFO_MASCON_CRI_GRID_RL06_V2
  TELLUS_GRAC_L3_CSR_RL06_OCN_v04


In [6]:
ds_found = dict()

for ds in datasets:
    print('\n')
    print(ds)
    ds_found[ds.name] = dict()
    grids = list((ds / 'transformed_products').glob('*'))
    
    for g in grids:
        ds_found[ds.name][g.name] = dict()
        
        print(f'   {g.name}')
        vars= list((ds / 'transformed_products' / g/ 'aggregated').glob('*'))

        for var in vars:
            ds_found[ds.name][g.name][var.name] = dict()
            print(f'      {var.name}')
            fp = Path(ds / 'transformed_products' / g/ 'aggregated' / var / 'netCDF')
            mon_files =  list(fp.glob(f'*{g.name}_MONTHLY*nc'))
            day_files =  list(fp.glob(f'*{g.name}_DAILY*nc'))

            if len(mon_files) > 0:
                mon_fns = [file.name for file in mon_files]
            else:
                mon_fns = []

            if len(day_files) > 0:
                day_fns = [file.name for file in day_files]            
            else:
               day_fns = []

            ds_found[ds.name][g.name][var.name]['file_path']  = fp
            ds_found[ds.name][g.name][var.name]['mon_files']  = mon_fns
            ds_found[ds.name][g.name][var.name]['day_files']  = day_fns


            #ds_found[ds.name][g.name][var.name] = fncs
            



/home/ep/shared/ECCO-pipeline-ep/observations/AMSR-2_OSI-408
   ECCO_llc90
      ice_conc
      confidence_level
      total_uncertainty
   ECCO_llc270
      ice_conc
      confidence_level
      total_uncertainty
   TPOSE
      ice_conc
      confidence_level
      total_uncertainty


/home/ep/shared/ECCO-pipeline-ep/observations/AQUARIUS_L3_SSS_SMI_MONTHLY_V5
   ECCO_llc90
      SSS
      SSS_sys_unc
   ECCO_llc270
      SSS
      SSS_sys_unc
   TPOSE
      SSS
      SSS_sys_unc


/home/ep/shared/ECCO-pipeline-ep/observations/ATL20_V004_daily


/home/ep/shared/ECCO-pipeline-ep/observations/ATL20_V004_monthly
   ECCO_llc90
      mean_fb
   ECCO_llc270
      mean_fb
   TPOSE
      mean_fb


/home/ep/shared/ECCO-pipeline-ep/observations/AVHRR_OI-NCEI-L4-GLOB-v2.0
   ECCO_llc90
      analysed_sst
      analysis_error
   ECCO_llc270
   TPOSE
      analysed_sst
      analysis_error


/home/ep/shared/ECCO-pipeline-ep/observations/AVHRR_OI-NCEI-L4-GLOB-v2.1
   ECCO_llc90
      analysed_sst

In [7]:
def export_ds_found_dict(ds_found, export_filename=''):

    tmp_stdout= sys.stdout

    if len(export_filename) > 0:
        try:
            sys.stdout = open(export_filename,'wt')
        except:
            print('could not make filename')
            return ''
            
    
    print(f'Datasets in {pipeline_dir}')
    print('===================================================')
    for ds in ds_found.keys():
        print(f'{ds}')
    
    for ds in ds_found.keys():
        print('\n===============================================================')
        if ds in yamls:
            print(f"{ds}\n{yamls[ds]['original_dataset_title']}")
            print(f"doi:{yamls[ds]['original_dataset_doi']}")
        else:
            print(f'{ds}\nYAML not founds in ds_conf')
    
        print(  '===============================================================')
        if len(ds_found[ds].keys()) == 0:
            print ('\nNO DATA!\n')
        
        for g in ds_found[ds].keys():
            print(f'\n  =>> {g} <<= ')
    
            for v in ds_found[ds][g].keys():
                tmp = ds_found[ds][g][v];
    
                t2 = len(tmp['mon_files'])
                if t2  > 0:
                    mfy = tmp['mon_files'][0][-7:-3]
                    mly = tmp['mon_files'][-1][-7:-3]
                    mflys = f'[{mfy}-{mly}]'
                else:
                    mflys = ''
                    
                t3 = len(tmp['day_files'])
                if t3 > 0:
                    dfy = tmp['day_files'][0][-7:-3]
                    dly = tmp['day_files'][-1][-7:-3]
                    dflys = f'[{dfy}-{dly}]'
                else:
                    dflys = ''
                    
                print(f"     * {v.ljust(28)}   MON:{str(len(tmp['mon_files'])).ljust(2)} {mflys}  DAY:{str(len(tmp['day_files'])).ljust(2)} {dflys}")

    sys.stdout = tmp_stdout

In [8]:
export_ds_found_dict(ds_found, export_filename='/home/ifenty/test.txt')

In [9]:
export_ds_found_dict(ds_found)

Datasets in /home/ep/shared/ECCO-pipeline-ep/observations
AMSR-2_OSI-408
AQUARIUS_L3_SSS_SMI_MONTHLY_V5
ATL20_V004_daily
ATL20_V004_monthly
AVHRR_OI-NCEI-L4-GLOB-v2.0
AVHRR_OI-NCEI-L4-GLOB-v2.1
G02202_V4
G10016_V2
GRC_FGO_GRIDDED_AOD1B_JPL_MASCON_RL06
L3_DEBIAS_LOCEAN_v8_q09
L3_DEBIAS_LOCEAN_v8_q18
MODIS_AQUA_L3_SST_THERMAL_DAILY_9KM_DAYTIME_V2019.0
OISSS_L4_multimission_monthly_v2
RDEFT4
SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205
SMAP_RSS_L3_SSS_SMI_MONTHLY_V4
SSMIS_OSI-401-b
SSMIS_OSI-430-a_daily
SSMIS_OSI-430-a_monthly
SSMIS_OSI-450-a_daily
SSMIS_OSI-450-a_monthly
TELLUS_GRAC-GRFO_MASCON_CRI_GRID_RL06.1_V3
TELLUS_GRAC-GRFO_MASCON_CRI_GRID_RL06_V2
TELLUS_GRAC_L3_CSR_RL06_OCN_v04
TELLUS_GRFO_L3_CSR_RL06.2_OCN_v04

AMSR-2_OSI-408
Global Sea Ice Concentration (AMSR-2)
doi:OSI-408

  =>> ECCO_llc90 <<= 
     * ice_conc                       MON:8  [2016-2023]  DAY:8  [2016-2023]
     * confidence_level               MON:8  [2016-2023]  DAY:8  [2016-2023]
     * total_unc