In [1]:
from space_ouija.ouija_boards.messenger import load_FIPS_EDR
from glob import glob
from datetime import datetime
import numpy as np
import pycdfpp
import re
from multiprocessing import Pool

In [2]:
START_DT_RE = re.compile(r'\s+<start_date_time>(?P<start>[T\d\:\.\-]+)Z</start_date_time>$', flags=re.MULTILINE)
def start_datetime(dat_file:str):
    xml_file = dat_file.replace(".DAT", ".xml")
    return np.datetime64(START_DT_RE.findall(open(xml_file, 'r').read())[0])

def build_time_vect(start_dt, met):
    return start_dt + (met - met[0]).astype('timedelta64[s]')

def to_cdf(dat_file):
    edr=load_FIPS_EDR(dat_file)
    cdf=pycdfpp.CDF()
    cdf.add_variable(
        name='Epoch',
        values = build_time_vect(start_datetime(dat_file), edr["met"]),
        attributes={
            'VAR_TYPE':'support_data',
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    cdf.add_variable(
        name='scantype',
        values=edr['scantype'],
        attributes={
            'DEPEND_0':'Epoch',
            'VAR_TYPE':'data',
            'DISPLAY_TYPE':'time_series',
            'CATDESC':"Indicates the FIPS Scan Mode. =0 Normal Scan =1 High Temp Scan, =2 Burst Scan, =3 Test Scan, =4 Table 4, =5 Table 5, =6 Table 6, =7 Table 7. ",
            'LABLAXIS': 'Scan Type'
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    cdf.add_variable(
        name='start_rate',
        values=edr['start_rate'],
        attributes={
            'DEPEND_0':'Epoch',
            'VAR_TYPE':'data',
            'DISPLAY_TYPE':'spectrogram',
            'CATDESC':"Start rate counter sampled at each Deflection System High Voltage (DSHV) step in the scan."
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    cdf.add_variable(
        name='stop_rate',
        values=edr['stop_rate'],
        attributes={
            'DEPEND_0':'Epoch',
            'DISPLAY_TYPE':'spectrogram',
            'VAR_TYPE':'data',
            'CATDESC':"Stop rate counter sampled at each DSHV step in the scan."
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    cdf.add_variable(
        name='valid_evt_rate',
        values=edr['valid_evt_rate'],
        attributes={
            'DEPEND_0':'Epoch',
            'DISPLAY_TYPE':'spectrogram',
            'VAR_TYPE':'data',
            'CATDESC':"Valid event rate counter sampled at each DSHV step in the scan."
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    cdf.add_variable(
        name='proton_rate',
        values=edr['proton_rate'],
        attributes={
            'DEPEND_0':'Epoch',
            'DISPLAY_TYPE':'spectrogram',
            'VAR_TYPE':'data',
            'CATDESC':"Proton rate counter sampled at each DSHV step in the scan."
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    cdf.add_variable(
        name='evt_proc_rate',
        values=edr['evt_proc_rate'],
        attributes={
            'DEPEND_0':'Epoch',
            'DISPLAY_TYPE':'spectrogram',
            'VAR_TYPE':'data',
            'CATDESC':"Events processed rate counter sampled at each DSHV step in the scan."
        },
        compression=pycdfpp.CompressionType.gzip_compression
    )
    pycdfpp.save(cdf, dat_file.replace('.DAT', '.cdf').replace("_V1", "").replace("_V2", ""))

In [3]:
dat_files = glob("/home/jeandet/Documents/DATA/Messenger/fips/EDR/data/ALL/*.DAT")
len(dat_files)

2269

In [4]:
with Pool(32) as p:
    p.map(to_cdf, dat_files)