In [1]:
import os
import timeit
from datetime import timedelta

import cdflib
import humanize
import matplotlib.pyplot as plt
import pycdfpp
import requests
from spacepy import pycdf

In [78]:
%%bash
mkdir -p /tmp/cdfpp
curl https://hephaistos.lpp.polytechnique.fr/data/mirrors/CDF/test_files/po_h9_pwi_1997010103_v01.cdf > /tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf
curl https://hephaistos.lpp.polytechnique.fr/data/mirrors/CDF/test_files/bigcdf_compressed.cdf > /tmp/cdfpp/bigcdf_compressed.cdf
curl https://hephaistos.lpp.polytechnique.fr/data/mirrors/CDF/test_files/mms1_scm_srvy_l2_scsrvy_20190301_v2.2.0.cdf > /tmp/cdfpp/mms1_scm_srvy_l2_scsrvy_20190301_v2.2.0.cdf

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  304M  100  304M    0     0   111M      0  0:00:02  0:00:02 --:--:--  111M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2420k  100 2420k    0     0  60.3M      0 --:--:-- --:--:-- --:--:-- 60.6M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 50.4M  100 50.4M    0     0  98.2M      0 --:--:-- --:--:-- --:--:-- 98.1M


In [2]:
cdf_corpus = (
    "/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf",
    "/tmp/cdfpp/bigcdf_compressed.cdf",
    "/tmp/cdfpp/mms1_scm_srvy_l2_scsrvy_20190301_v2.2.0.cdf",
)

In [3]:
class CdfLib:
    @staticmethod
    def just_open(fname):
        c = cdflib.CDF(fname)

    @staticmethod
    def list_variables(fname):
        c = cdflib.CDF(fname)
        cdf_info = c.cdf_info()
        variables = cdf_info.rVariables + cdf_info.zVariables

    @staticmethod
    def get_var_shape(fname, varname):
        shape = cdflib.CDF(fname).varinq(varname).Dim_Sizes

    @staticmethod
    def get_var_data(fname, varname, ntimes=1):
        c = cdflib.CDF(fname)
        for _ in range(ntimes):
            values = c.varget(varname)


class SpacePy:
    @staticmethod
    def just_open(fname):
        c = pycdf.CDF(fname)

    @staticmethod
    def list_variables(fname):
        variables = list(pycdf.CDF(fname))

    @staticmethod
    def get_var_shape(fname, varname):
        shape = pycdf.CDF(fname)[varname].shape

    @staticmethod
    def get_var_data(fname, varname, ntimes=1):
        c = pycdf.CDF(fname)
        for _ in range(ntimes):
            values = c[varname][:]


class PyCdfPp:
    @staticmethod
    def just_open(fname):
        c = pycdfpp.load(fname)

    @staticmethod
    def list_variables(fname):
        variables = list(pycdfpp.load(fname))

    @staticmethod
    def get_var_shape(fname, varname):
        shape = pycdfpp.load(fname)[varname].shape

    @staticmethod
    def get_var_data(fname, varname, ntimes=1):
        c = pycdfpp.load(fname)
        for _ in range(ntimes):
            values = c[varname].values

In [4]:
def bench_method(method, cdf_file, number=1000, **kwargs):
    return dict(
        sorted(
            [
                (
                    lib.__name__,
                    timeit.timeit(
                        lambda: getattr(lib, method)(cdf_file, **kwargs), number=number
                    )
                    / number,
                )
                for lib in (CdfLib, SpacePy, PyCdfPp)
            ],
            key=lambda item: item[1],
        )
    )

In [5]:
for file in cdf_corpus:
    print(f"{file}: {bench_method('just_open', file, number=100)}")

/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf: {'CdfLib': 5.571011919528246e-05, 'PyCdfPp': 0.0001119840401224792, 'SpacePy': 0.0007014661887660622}
/tmp/cdfpp/bigcdf_compressed.cdf: {'PyCdfPp': 1.360954949632287e-05, 'CdfLib': 3.652547020465136e-05, 'SpacePy': 0.0031634629890322685}
/tmp/cdfpp/mms1_scm_srvy_l2_scsrvy_20190301_v2.2.0.cdf: {'CdfLib': 3.510304028168321e-05, 'PyCdfPp': 6.58831000328064e-05, 'SpacePy': 0.06256008113035932}


In [6]:
for file in cdf_corpus:
    print(f"{file}: {bench_method('list_variables', file, number=100)}")

/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf: {'PyCdfPp': 0.0001295154611580074, 'CdfLib': 0.0002565167797729373, 'SpacePy': 0.0007644712389446795}
/tmp/cdfpp/bigcdf_compressed.cdf: {'PyCdfPp': 2.1287400741130114e-05, 'CdfLib': 4.239337053149939e-05, 'SpacePy': 0.0031737738801166414}
/tmp/cdfpp/mms1_scm_srvy_l2_scsrvy_20190301_v2.2.0.cdf: {'PyCdfPp': 7.636548951268197e-05, 'CdfLib': 0.00025656206998974084, 'SpacePy': 0.062447749180719256}


In [13]:
bench_method(
    "get_var_shape",
    "/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf",
    varname="WBR_Elec",
    number=10000,
)

{'CdfLib': 7.38048373023048e-05,
 'PyCdfPp': 0.00011609705919399858,
 'SpacePy': 0.0006526564202038571}

In [14]:
bench_method(
    "get_var_data",
    "/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf",
    varname="WBR_Elec",
    number=10,
)

{'PyCdfPp': 0.3304803824983537,
 'SpacePy': 2.016764428804163,
 'CdfLib': 2.0822215156047603}

In [15]:
bench_method(
    "get_var_data",
    "/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf",
    varname="WBR_Elec",
    ntimes=3,
    number=2,
)

{'PyCdfPp': 0.3278890200308524,
 'SpacePy': 6.021094444964547,
 'CdfLib': 6.262833864486311}

In [35]:
fname = "/tmp/cdfpp/po_h9_pwi_1997010103_v01.cdf"

In [36]:
c = cdflib.CDF(fname)

In [40]:
c.cdf_info().rVariables

['Epoch',
 'Delta_T',
 'Sample_rate',
 'FilterMode',
 'DATA_MODE',
 'WBR_Ant',
 'Gain1',
 'Gain2',
 'Translation',
 'NumPoints',
 'DATA_QUALITY',
 'WBR_Elec',
 'WBR_Mag',
 'Num_Snap',
 'FFT_size',
 'Data_No']

In [43]:
c.varinq("WBR_Mag")

VDRInfo(Variable='WBR_Mag', Num=12, Var_Type='rVariable', Data_Type=21, Data_Type_Description='CDF_REAL4', Num_Elements=1, Num_Dims=1, Dim_Sizes=[3984], Sparse='No_sparse', Last_Rec=-1, Rec_Vary=False, Dim_Vary=[True], Compress=1, Pad=array([-1.e+31], dtype=float32), Block_Factor=1)

In [44]:
c = pycdf.CDF(fname)

In [47]:
c["WBR_Mag"].shape

(3984,)

In [56]:
pycdfpp.load(fname)

CDF:
version: 2.6.2
majority: row

Attributes:
	Rules_of_use: " "
	Acknowledgement: " "
	HTTP_LINK: [ [ "http://www-pw.physics.uiowa.edu/plasma-wave/istp/polar/home.html", "http:/www-pw.physics.uiowa.edu/plasma-wave/istp/polar/interpretation.html" ] ]
	TEXT_supplement_1: " "
	Time_resolution: "1.0/Sample_rate"
	Generated_by: " "
	Mission_group: "Polar"
	TITLE: "POLAR PWI WBR"
	Project: "ISTP>International Solar-Terrestrial Physics"
	Discipline: "Space Physics>Magnetospheric Science"
	Instrument_type: "Radio and Plasma Waves (space)"
	MODS: "Created Dec 2003"
	LINK_TEXT: " "
	LINK_TITLE: " "
	Generation_date: "20 April, 2004"
	Data_type: "H9>High Resolution Data"
	Logical_source_description: "Polar Plasma Wave Instrument, High Frequency Waveform Receiver"
	Descriptor: "PWI>Plasma Wave Instrument"
	Data_version: "1"
	Logical_file_id: "PO_H9_PWI_1996032614_V01"
	TEXT: [ [ "Reference:..Gurnett, D.A. et al, The Polar plasma wave instrument, Space Science Reviews, Vol. 71, pp. 597-622, 1995.