In [None]:
import re
from pathlib import Path

def parse_dod(filepath):
    """
    Parse DOD-style file into dims, variables, attributes, and globals.

    NOTE:
    - A line starting with `#` is required to mark the beginning of global attributes.
    - Using 4 spaces for variable attributes.
    - Using 2 spaces for global attributes.
    - Tabs will be converted to 4 spaces automatically.
    """
    lines = Path(filepath).read_text().splitlines()
    dims, variables, globals_ = {}, {}, {}
    current_var = None
    in_globals = False

    for raw in lines:
        line = raw.rstrip('\n').replace('\t', '    ')  # normalize tabs to spaces
        if not line.strip():
            continue

        # Start of global attributes section
        if line.strip().startswith("#"):
            in_globals = True
            current_var = None
            continue

        indent = len(line) - len(line.lstrip())

        # 0-space lines before global section: dims or variable declarations
        if not in_globals and indent == 0:
            current_var = None
            if "=" in line and "(" not in line and ":" not in line:
                key, val = map(str.strip, line.split("=", 1))
                if key in ["time", "y", "x", "bound"]:
                    dims[key] = None if val.upper() == "UNLIMITED" else int(val)
                else:
                    globals_[key] = val
                continue

            m = re.match(r"^(\w+)\((.*?)\):(\w+)", line)
            if m:
                name, dims_str, dtype = m.groups()
                dims_tuple = tuple(d.strip() for d in dims_str.split(",")) if dims_str else ()
                variables[name] = {"dtype": dtype, "dims": dims_tuple, "attrs": {}}
                current_var = name
                continue

            m2 = re.match(r"^(\w+)\(\):(\w+)", line)
            if m2:
                name, dtype = m2.groups()
                variables[name] = {"dtype": dtype, "dims": (), "attrs": {}}
                current_var = name
                continue

        # 4-space variable attribute
        elif indent == 4 and current_var:
            if "=" in line:
                ak, av = map(str.strip, line.split("=", 1))
                av = av.strip('"')
                if ',' in av and 'flag_values' in ak:
                    av = tuple(int(x) for x in av.split(','))
                variables[current_var]["attrs"][ak] = av
            else:
                variables[current_var]["attrs"][line.strip()] = ""

        # 2-space global attribute after # section marker
        elif in_globals and indent == 2:
            if "=" in line:
                key, val = map(str.strip, line.split("=", 1))
                globals_[key] = val.strip('"')
            else:
                globals_[line.strip()] = ""

    return {"dimensions": dims, "variables": variables, "globals": globals_}


In [16]:
file_path = "/ccsopen/home/braut/projects/xprecipradarhp_vap_proc/dod.txt"
dod = parse_dod(file_path)

In [17]:
dod

{'dimensions': {'time': None, 'y': 160, 'x': 160, 'bound': 2},
 'variables': {'base_time': {'dtype': 'int',
   'dims': (),
   'attrs': {'string': '',
    'long_name': 'Base time in Epoch',
    'units': 'seconds since 1970-1-1 0:00:00 0:00',
    'ancillary_variables': 'time_offset'}},
  'time_offset': {'dtype': 'double',
   'dims': ('time',),
   'attrs': {'long_name': 'Time offset from base_time',
    'units': '',
    'ancillary_variables': 'base_time'}},
  'time': {'dtype': 'double',
   'dims': ('time',),
   'attrs': {'long_name': 'Time offset from midnight',
    'units': '',
    'standard_name': 'time',
    'string': '',
    'ancillary_variables': 'time_offset'}},
  'corrected_reflectivity': {'dtype': 'float',
   'dims': ('time', 'y', 'x'),
   'attrs': {'long_name': 'Corrected equivalent_reflectivity_factor',
    'units': 'dBZ',
    '_FillValue:float': '-9999',
    'standard_name': 'equivalent_reflectivity_factor',
    'coordinates': 'lat lon',
    'missing_value:float': '-9999'}},
  

In [6]:
def create_nc_from_dod(path, dod):
    """
    Build NetCDF-4 file from DOD spec, preserving all attributes exactly.
    """
    from netCDF4 import Dataset
    
    ds = Dataset(path, "w", format="NETCDF4")  

    # dimensions
    for dim, size in dod["dimensions"].items():
        ds.createDimension(dim, size)

    # global attributes
    for k, v in dod["globals"].items():
        setattr(ds, k, v)

    # variables
    for name, spec in dod["variables"].items():
        dtype = spec["dtype"]
        dims  = spec["dims"]
        attrs = dict(spec["attrs"])
        fv    = attrs.pop("_FillValue", None)

        # NetCDF-4 supports proper fill_value
        if fv is not None:
            var = ds.createVariable(name, dtype, dims, fill_value=fv)
        else:
            var = ds.createVariable(name, dtype, dims)

        for ak, av in attrs.items():
            setattr(var, ak, av)

    return ds


In [18]:
outfile = "/ccsopen/home/braut/projects/xprecipradarhp_vap_proc/test_hp_proc.nc"
nc_ds = create_nc_from_dod(outfile, dod)

In [19]:
nc_ds.close()