In [7]:
import os

os.environ["PYISTP_CDFLIB"] = 'pycdfpp'
#os.environ["PYISTP_CDFLIB"]='spacepy'
import pyistp
from pyistp import _impl as pyistp_impl
import requests
from glob import glob
import zstd
import tempfile
import tarfile
import io
import logging
from tqdm import tqdm

def clean_logs():
    lines=open('output/result.log').readlines()
    index = 1
    while index < len(lines):
        if lines[index].startswith('Loading') and lines[index-1].startswith('Loading'):
            lines.pop(index-1)
        else:
            index+=1
    open('output/result_clean.log','w').writelines(lines)

def compare_master_and_sample(master, sample_url_part, variable_name):
    pyistp_impl.log.handlers.clear()
    pyistp_impl.log.addHandler(logging.StreamHandler())
    master = pyistp.load(buffer=requests.get(f"https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/{master}").content)
    sample_file = pyistp.load(buffer=requests.get(f"https://cdaweb.gsfc.nasa.gov/pub/data/{sample_url_part}").content)
    print("master:", flush=True)
    v=master.data_variable(variable_name)
    print("sample_file:", flush=True)
    v=sample_file.data_variable(variable_name)

# Prepare output folder

In [2]:
if not os.path.exists('output'):
    os.mkdir('output')

if os.path.exists('output/result.log'):
    os.remove('output/result.log')

pyistp_impl.log.handlers.clear()
pyistp_impl.log.addHandler(logging.FileHandler("output/result.log"))
pyistp_impl.log.setLevel(logging.DEBUG)

# Download and a load all masters as ISTP files

In [3]:

with tempfile.TemporaryDirectory() as tmp_dir:
    tar = tarfile.open(fileobj=io.BytesIO(
        zstd.decompress(requests.get('https://hephaistos.lpp.polytechnique.fr/data/jeandet/master.tar.zst').content)))
    tar.extractall(tmp_dir)
    del tar
    cdf_masters = glob(f"{tmp_dir}/*.cdf")
    for master in tqdm(cdf_masters):
        istp_file = pyistp.load(master)
        for vname in istp_file.data_variables():
            v = istp_file.data_variable(vname)

clean_logs()

100%|██████████| 3324/3324 [00:21<00:00, 152.98it/s]


# let's manually check few files

## c3_cp_efw_l3_e3d_inert
Both master and sample file have the same issue

In [9]:
compare_master_and_sample(master="c3_cp_efw_l3_e3d_inert_00000000_v01.cdf",
                          sample_url_part="cluster/c3/efw/efield_3dvect_spinreso_l3_inertialframe/2003/c3_cp_efw_l3_e3d_inert_20030120_v20130803.cdf",
                          variable_name="delta_Ez_ISR2__C3_CP_EFW_L3_E3D_INERT")

master:


Non compliant ISTP file: variable delta_Ez_ISR2__C3_CP_EFW_L3_E3D_INERT has LABL_PTR_1 attribute which points to variable dEz which does not exist


sample_file:


Non compliant ISTP file: variable delta_Ez_ISR2__C3_CP_EFW_L3_E3D_INERT has LABL_PTR_1 attribute which points to variable dEz which does not exist


## mms4_fpi_fast_l2_dis-momsaux
Both master and sample file have the same issue

In [10]:
compare_master_and_sample(master="mms4_fpi_fast_l2_dis-momsaux_00000000_v01.cdf",
                          sample_url_part="mms/mms4/fpi/fast/l2/dis-momsaux/2021/10/mms4_fpi_fast_l2_dis-momsaux_20211003140000_v3.4.0.cdf",
                          variable_name="mms4_dis_compressionloss_fast")

master:


Non compliant ISTP file: mms4_dis_compressionloss_fast was marked as data variable but it has 0 support variable


sample_file:


Non compliant ISTP file: mms4_dis_compressionloss_fast was marked as data variable but it has 0 support variable


## mvn_swi_l2_finearc3d
Both master and sample file have the same issue

In [11]:
compare_master_and_sample(master="mvn_swi_l2_finearc3d_00000000_v01.cdf",
                          sample_url_part="maven/swia/l2/finearc3d/2020/11/mvn_swi_l2_finearc3d_20201125_v02_r00.cdf",
                          variable_name="eindex")

master:


Non compliant ISTP file: eindex was marked as data variable but it has 0 support variable


sample_file:


Non compliant ISTP file: eindex was marked as data variable but it has 0 support variable
