# Checksum consistency of Dap Responses across Server Implementations


- Checks that Thredds data server and Hyrax data server return the same data given the request from a DAP4 client
- Makes use of a very lightweight file that is currently available via [TDS](https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4) and [Hyrax](http://test.opendap.org/opendap/dap4/SimpleGroup.nc4.h5.dmr) server implementations. 


### requirements
- numpy
- pydap (installed from source: `pip install git+https://github.com/pydap/pydap.git`
- jupyterlab

In [None]:
# %load_ext autoreload
# %autoreload 2

In [1]:
from pydap.client import get_cmr_urls, open_url
from pydap.net import create_session
import numpy as np
import pydap

In [2]:
pydap.__version__

'3.5.7.dev5+g3ccdfe597'

In [3]:
my_session=create_session(use_cache=True, cache_kwargs={'cache_name':'debug'})
my_session.cache.clear()

In [4]:
hyx_url = "dap4://test.opendap.org/opendap/dap4/SimpleGroup.nc4.h5"
tds_url = "dap4://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4"

In [5]:
pyds = open_url(tds_url, session=my_session, checksums=True, batch=True)
pyds.tree()

.SimpleGroup.nc4
├──SimpleGroup
│  ├──Temperature
│  ├──Salinity
│  ├──Y
│  └──X
├──Pressure
├──time_bnds
├──time
└──Z


## this works well

Downbload decode a single variable within a dap response

In [6]:
np.asarray(pyds['time'][:].data) # <--- this is correct!

array([0.5], dtype=float32)

In [7]:
my_session.cache.urls()

['https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4.dap?dap4.ce=time%5B0%3A1%3A0%5D&dap4.checksum=true',
 'https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4.dmr']

In [8]:
my_session.cache.clear() # clear the cached urls

## this also works well

Download/decode 2 variables within same response. The two variables live within the same Group: `SimpleGroup`

In [9]:
# trigger download subset of 2 variables at once
X = pyds['SimpleGroup/X'][:].data;
Salt = pyds['SimpleGroup/Salinity'][0,10:20,10:20].data;

In [10]:
np.asarray(X) # <----- this is correct

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39], dtype=int16)

In [11]:
np.asarray(Salt) # <----- this is also correct

array([[[30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.],
        [30., 30., 30., 30., 30., 30., 30., 30., 30., 30.]]],
      dtype=float32)

In [12]:
my_session.cache.urls()

['https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4.dap?dap4.ce=%2FSimpleGroup%2FSalinity%5B0%3A1%3A0%5D%5B10%3A1%3A19%5D%5B10%3A1%3A19%5D%3B%2FSimpleGroup%2FX%5B0%3A1%3A39%5D&dap4.checksum=true']

In [13]:
my_session.cache.urls()[0].replace("%2F","/").replace("%5B","[").replace("%5D","]").replace("%3A",":").replace("%3B",";")

'https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4.dap?dap4.ce=/SimpleGroup/Salinity[0:1:0][10:1:19][10:1:19];/SimpleGroup/X[0:1:39]&dap4.checksum=true'

## Including variables of different hierarchy fails!!

perhaps something about the change in the hierarchy

In [14]:
my_session.cache.clear() # clear the cached urls

In [15]:
time = pyds['/time'][:].data;
X = pyds['/SimpleGroup/X'][:].data;

In [16]:
np.asarray(time) # <------------------------------------- this is no longer correct

array([3.581638e-39], dtype=float32)

In [17]:
np.asarray(X) # <------------------------------------- some of the elements are incorrect

array([     0,  16128,  -3551, -26846,      0,      1,      2,      3,
            4,      5,      6,      7,      8,      9,     10,     11,
           12,     13,     14,     15,     16,     17,     18,     19,
           20,     21,     22,     23,     24,     25,     26,     27,
           28,     29,     30,     31,     32,     33,     34,     35],
      dtype=int16)

In [18]:
my_session.cache.urls()

['https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4.dap?dap4.ce=%2FSimpleGroup%2FX%5B0%3A1%3A39%5D%3Btime%5B0%3A1%3A0%5D&dap4.checksum=true']

In [19]:
my_session.cache.urls()[0].replace("%2F","/").replace("%5B","[").replace("%5D","]").replace("%3A",":").replace("%3B",";")

'https://thredds-test.unidata.ucar.edu/thredds/dap4/dev/d4icomp/restricted/SimpleGroup.nc4.dap?dap4.ce=/SimpleGroup/X[0:1:39];time[0:1:0]&dap4.checksum=true'

## Lets look at the response

In [20]:
my_session.get(my_session.cache.urls()[0])

CachedResponse(_content=b'\x04\x00\x04:<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\r\n<Dataset\r\n         name="SimpleGroup.nc4"\r\n         dapVersion="4.0"\r\n         dmrVersion="1.0"\r\n         xmlns="http://xml.opendap.org/ns/DAP/4.0#"\r\n         xmlns:dap="http://xml.opendap.org/ns/DAP/4.0#">\r\n    <Float32 name="time">\r\n        <Dim size="1"/>\r\n        <Attribute name="_DAP4_Checksum_CRC32" type="Int32">\r\n            <Value value="2535649825"/>\r\n        </Attribute>\r\n    </Float32>\r\n    <Attribute name="_NCProperties" type="String">\r\n        <Value value="version=2,netcdf=4.9.2,hdf5=1.14.3"/>\r\n    </Attribute>\r\n    <Attribute name="_dap4.ce" type="String">\r\n        <Value value="/SimpleGroup/X[0:39];/time[0]"/>\r\n    </Attribute>\r\n    <Attribute name="_DAP4_Little_Endian" type="UInt8">\r\n        <Value value="1"/>\r\n    </Attribute>\r\n    <Group name="SimpleGroup">\r\n        <Int16 name="X">\r\n            <Dim size="40"/>\r\n          

# Now Try Same Approach With Hyrax

All scenarios work, including the one below:


In [21]:
my_session.cache.clear() # clear the cached urls

In [22]:
pyds = open_url(hyx_url, session=my_session, checksums=True, batch=True)
pyds.tree()

.SimpleGroup.nc4.h5
├──SimpleGroup
│  ├──Y
│  ├──X
│  ├──Temperature
│  └──Salinity
├──time
├──Z
├──Pressure
└──time_bnds


In [23]:
time = pyds['/time'][:].data;
X = pyds['/SimpleGroup/X'][:].data;

## not an issue with hyrax

In [24]:
np.asarray(time)

array([0.5], dtype=float32)

In [25]:
np.asarray(X)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39], dtype=int16)

## take a look at the response from Hyrax


In [26]:
my_session.cache.urls()[0].replace("%2F","/").replace("%5B","[").replace("%5D","]").replace("%3A",":").replace("%3B",";")

'http://test.opendap.org/opendap/dap4/SimpleGroup.nc4.h5.dap?dap4.ce=/SimpleGroup/X[0:1:39];time[0:1:0]&dap4.checksum=true'

In [27]:
my_session.get(my_session.cache.urls()[0])

CachedResponse(_content=b'\x04\x00\x02\x16<?xml version="1.0" encoding="ISO-8859-1"?>\n<Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xml:base="http://test.opendap.org/opendap/dap4/SimpleGroup.nc4.h5" dapVersion="4.0" dmrVersion="1.0" name="SimpleGroup.nc4.h5">\n    <Float32 name="time">\n        <Dim size="1"/>\n    </Float32>\n    <Attribute name="description" type="String">\n        <Value>A simple group for testing.</Value>\n    </Attribute>\n    <Group name="SimpleGroup">\n        <Int16 name="X">\n            <Dim size="40"/>\n        </Int16>\n    </Group>\n</Dataset>\n\r\n\x04\x00\x00\\\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00\x07\x00\x08\x00\t\x00\n\x00\x0b\x00\x0c\x00\r\x00\x0e\x00\x0f\x00\x10\x00\x11\x00\x12\x00\x13\x00\x14\x00\x15\x00\x16\x00\x17\x00\x18\x00\x19\x00\x1a\x00\x1b\x00\x1c\x00\x1d\x00\x1e\x00\x1f\x00 \x00!\x00"\x00#\x00$\x00%\x00&\x00\'\x002\xe6\xa8\xe6\x00\x00\x00?!\xf2"\x97\x05\x00\x00\x00', created_at='2025-09-03 00:27:56.543952+00:00', e