In [16]:
import speasy as spz
from speasy.core.any_files import list_files
import yaml

In [4]:
cda_ws_datasets = set([d.mastercdf.split('/')[-1] for d in spz.inventories.flat_inventories.cda.datasets.values()])

In [5]:
available_datasets = set(
    list_files(file_regex='.*.cdf', url="https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/"))

In [6]:
missing_datasets = list(available_datasets - cda_ws_datasets)

In [7]:
len(available_datasets), len(cda_ws_datasets), len(missing_datasets)

(4111, 2685, 1426)

In [19]:
missing_datasets_above_l1 = list(filter(lambda x: '_l1' not in x and 'r0' not in x, missing_datasets))

In [20]:
mms = list(filter(lambda x: x.startswith('mms'), missing_datasets_above_l1))

In [21]:
mms

['mms1_edi_srvy_l2_amb-perp-ob_00000000_v01.cdf',
 'mms2_fpi_fast_l2_des-distaux_00000000_v01.cdf',
 'mms1_edi_srvy_l2_amb-alt-oob_00000000_v01.cdf',
 'mms2_edi_srvy_l2_amb-perp-ob_00000000_v01.cdf',
 'mms2_edi_srvy_l2_amb-alt-cc_00000000_v01.cdf',
 'mms4_edi_srvy_l2_amb-perp-ob_00000000_v01.cdf',
 'mms4_edi_brst_l2_amb-alt-oob_00000000_v01.cdf',
 'mms3_fpi_fast_l2_des-momsaux_00000000_v01.cdf',
 'mms1_feeps_raw_l2_ion_00000000_v01.cdf',
 'mms2_edi_srvy_l2_amb-alt-oob_00000000_v01.cdf',
 'mms3_hpca_brst_l2_tof-counts_00000000_v01.cdf',
 'mms3_fpi_fast_l2_dis-momsaux_00000000_v01.cdf',
 'mms1_fpi_fast_l2_des-distaux_00000000_v01.cdf',
 'mms1_fsm_brst_l3_8khz_00000000_v01.cdf',
 'mms1_edi_srvy_l2_amb-alt-oc_00000000_v01.cdf',
 'mms3_feeps_raw_l2_ion_00000000_v01.cdf',
 'mms1_fpi_fast_l2_dis-momsaux_00000000_v01.cdf',
 'mms4_edi_srvy_l2_amb-alt-oob_00000000_v01.cdf',
 'mms3_edi_srvy_l2_amb-alt-oob_00000000_v01.cdf',
 'mms4_fpi_fast_l2_dis-momsaux_00000000_v01.cdf',
 'mms1_fpi_brst_l2_des-

In [26]:
def make_inventory_entry(master_cdf, mission):
    parts = master_cdf.split('_')
    spacecraft = parts[0]
    instrument = parts[1]
    mode = parts[2]
    level = parts[3]
    cdf_name = '_'.join(parts[:-2])
    node = cdf_name.replace('-', '_')
    path = 'cda/' + f'{mission}/{spacecraft}/{instrument}/{mode}'.upper()
    if mode.lower() == 'brst':
        return node, {
            'url_pattern': f'https://cdaweb.gsfc.nasa.gov/pub/data/{mission}/{spacecraft}/{instrument}/{mode}/{level}/{{Y}}/{{M:02d}}/{cdf_name}_{{Y}}{{M:02d}}{{D:02d}}_v\\d+.\\d+.\\d+.cdf',
            'use_file_list': True,
            'master_cdf': f"https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/{master_cdf}",
            'inventory_path': path,
            'split_rule': "random",
            'split_frequency': "monthly",
            'fname_regex': f'{cdf_name}_(?P<start>\\d+)_v(?P<version>[\\d\\.]+)\\.cdf'
        }
    else:
        return node, {
            'url_pattern': f'https://cdaweb.gsfc.nasa.gov/pub/data/{mission}/{spacecraft}/{instrument}/{mode}/{level}/{{Y}}/{{M:02d}}/{cdf_name}_{{Y}}{{M:02d}}{{D:02d}}_v\\d+.\\d+.\\d+.cdf',
            'use_file_list': True,
            'master_cdf': f"https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/{master_cdf}",
            'inventory_path': path,
            'split_rule': "regular"
        }

In [27]:
mms_inv = dict(map(lambda x: make_inventory_entry(x, 'mms'), mms))

with open('../speasy/data/archive/cda_2.yaml', 'w') as inv_f:
    yaml.dump(mms_inv, inv_f)

In [11]:
f = list(map(lambda x: x.split('_'), missing_datasets_above_l1))

In [12]:
set(map(lambda x: x[0], f))

{'a2',
 'ac',
 'aimp2',
 'alouette1',
 'alouette2',
 'altt2',
 'apollo11',
 'apollo12',
 'apollo14',
 'apollo15',
 'apollo16',
 'bar',
 'c1',
 'c2',
 'c3',
 'c4',
 'cc',
 'ccswe',
 'cips',
 'cl',
 'cnofs',
 'd1',
 'de-2',
 'de1',
 'de2',
 'dmsp',
 'dmsp-f06',
 'dmsp-f07',
 'dmsp-f08',
 'dmsp-f09',
 'dmsp-f12',
 'dmsp-f14',
 'dmsp-f15',
 'dmsp-f16',
 'dmsp-f17',
 'dmsp-f18',
 'dmspf16',
 'dmspf17',
 'dmspf18',
 'endurance',
 'eq',
 'erg',
 'exp35',
 'fa',
 'fast',
 'formosat2',
 'formosat5',
 'g0',
 'g12',
 'g5',
 'g6',
 'galileo',
 'gb',
 'ge',
 'gm',
 'goes12',
 'goes15',
 'gold',
 'gps',
 'hawkeye',
 'hel1',
 'helios1',
 'helios2',
 'i1',
 'i2',
 'i7',
 'i8',
 'ibex',
 'ij',
 'im',
 'image',
 'ir',
 'isee-3-return',
 'isee1',
 'isee2',
 'iss',
 'it',
 'l0',
 'l1',
 'l4',
 'map',
 'mariner2',
 'mess',
 'messenger',
 'mms1',
 'mms2',
 'mms3',
 'mms4',
 'mvn',
 'new',
 'noaa05',
 'noaa06',
 'noaa07',
 'noaa08',
 'noaa10',
 'noaa12',
 'noaa19',
 'ohzora',
 'om',
 'pioneer10',
 'pioneer11

In [29]:
'mms1_edp_brst_l2_hmfe_00000000_v01.cdf' in cda_ws_datasets

True