Skip to content

Commit

Permalink
Add support for multiple filters per variable.
Browse files Browse the repository at this point in the history
re: Unidata#1584

Support has been added for multiple filters per variable.  This
affects a number of components in netcdf. The new APIs are
documented in NUG/filters.md.

The primary changes are:
* A set of new functions are provided (see __include/netcdf_filter.h__).
    - Obtain a list of the filters associated with a variable
    - Obtain the parameters for a specific filter.
* The existing __nc_inq_var_filter__ function now returns info
  about the first defined filter.
* The utilities (ncgen, ncdump, and nccopy) now support
  an extended format for specifying a sequence of filters.
  The general form is __<filter>|<filter>..._.
* The ncdump **_Filter** attribute now dumps a list of all the
  filters associated with a variable using the above new format.
* Filter specifications can now use a filter name instead of number
  for filters known to the netcdf library, which in turn is taken
  from the HDF5 filter registration page.
* New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter
  is returned if an attempt is made to access an unknown filter.
* Internally, the dispatch table has been extended to add a function
  to handle all of the filter functions.
* New, filter-related, tests were added to nc_test4.
* A new plugin was added to the plugins directory to help with testing.

Notes:
1. The shuffle and fletcher32 filters are not part of the multifilter system.

Misc. changes:
1. A debug module was added to libhdf5 to help catch error locations.
  • Loading branch information
DennisHeimbigner committed Feb 16, 2020
1 parent 3bcdb5f commit 44d0dca
Show file tree
Hide file tree
Showing 70 changed files with 3,941 additions and 2,070 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ IF(USE_HDF5 OR ENABLE_NETCDF_4)
SET(HAVE_H5Z_SZIP 1)
SET(SZIP_LIBRARY ${SZIP})
SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${SZIP})
MESSAGE(STATUS "HDF5 has szip.")
ELSE()
MESSAGE(FATAL_ERROR "HDF5 Requires SZIP, but cannot find libszip or libsz.")
ENDIF()
Expand Down
3 changes: 3 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,14 @@ LIBSRC4_DIR = libsrc4
endif

# Is the user building with HDF5?
# Note that USE_HDF5 does not imply USE_NETCDF4
if USE_HDF5
H5_TEST_DIR = h5_test
LIBHDF5 = libhdf5
if USE_NETCDF4
NC_TEST4 = nc_test4
endif
endif

# Build the dap2 client
if ENABLE_DAP
Expand Down
341 changes: 225 additions & 116 deletions NUG/filters.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.7.4 - TBD

* [Enhancement] Support has been added for multiple filters per variable. See [https://github.com/Unidata/netcdf-c/issues/1584].
* [Enhancement] Now nc_inq_var_szip retuns 0 for parameter values if szip is not in use for var. See [https://github.com/Unidata/netcdf-c/issues/1618].
* [Enhancement] Now allow parallel I/O with filters, for HDF5-1.10.3 and later. See [https://github.com/Unidata/netcdf-c/issues/1473].
* [Enhancement] Increased default size of cache buffer to 16 MB, from 4 MB. Increased number of slots to 4133. See [https://github.com/Unidata/netcdf-c/issues/1541].
Expand Down
10 changes: 6 additions & 4 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ fi
AC_MSG_RESULT([$enable_netcdf_4])

# Does the user want to use HDF5?
# WARNING: enable_hdf5 does not imply that use_netcdf_4 is enabled
AC_MSG_CHECKING([whether we should build with HDF5])
#enable_hdf5=$enable_netcdf_4
AC_ARG_ENABLE([hdf5], [AS_HELP_STRING([--disable-hdf5],
Expand Down Expand Up @@ -1022,8 +1023,11 @@ if test "x$enable_netcdf_4" = xyes; then
AC_DEFINE([USE_NETCDF4], [1], [if true, build netCDF-4])
fi

# Set defaults
hdf5_parallel=no
hdf5_supports_par_filters=no
enable_szlib=no

if test "x$enable_hdf5" = xyes; then

AC_DEFINE([USE_HDF5], [1], [if true, use HDF5])
Expand Down Expand Up @@ -1082,16 +1086,12 @@ if test "x$enable_hdf5" = xyes; then
# Check whether HDF5 was built with the SZLIB library. If so we
# must be able to link to szip library.
AC_MSG_CHECKING([whether szlib was used when building HDF5])
enable_szlib=no
if test "x$ac_cv_func_H5Z_SZIP" = xyes; then
enable_szlib=yes
AC_DEFINE([USE_SZIP], [1], [if true, compile in szip compression in netCDF-4 variables])
fi
AC_MSG_RESULT([$enable_szlib])

fi
AM_CONDITIONAL(HAS_PAR_FILTERS, [test x$hdf5_supports_par_filters = xyes ])
AC_SUBST([HAS_PAR_FILTERS], [$hdf5_supports_par_filters])

# If the user wants hdf4 built in, check it out.
if test "x$enable_hdf4" = xyes; then
Expand Down Expand Up @@ -1302,6 +1302,7 @@ AM_CONDITIONAL(SHOW_DOXYGEN_TAG_LIST, [test x$enable_doxygen_tasks = xyes])
AM_CONDITIONAL(ENABLE_METADATA_PERF, [test x$enable_metadata_perf = xyes])
AM_CONDITIONAL(ENABLE_BYTERANGE, [test "x$enable_byterange" = xyes])
AM_CONDITIONAL(RELAX_COORD_BOUND, [test "xyes" = xyes])
AM_CONDITIONAL(HAS_PAR_FILTERS, [test x$hdf5_supports_par_filters = xyes ])

# If the machine doesn't have a long long, and we want netCDF-4, then
# we've got problems!
Expand Down Expand Up @@ -1434,6 +1435,7 @@ AC_SUBST(HAS_JNA,[$enable_jna])
AC_SUBST(HAS_ERANGE_FILL,[$enable_erange_fill])
AC_SUBST(HAS_BYTERANGE,[$enable_byterange])
AC_SUBST(RELAX_COORD_BOUND,[yes])
AC_SUBST([HAS_PAR_FILTERS], [$hdf5_supports_par_filters])

# Include some specifics for netcdf on windows.
#AH_VERBATIM([_WIN32_STRICMP],
Expand Down
28 changes: 18 additions & 10 deletions debug/cf
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
#!/bin/bash
#NB=1
DB=1
#DB=1
#X=-x

#ANSI=1
#MEM=1
#MEM=1 # export NC_VLEN_NOTEST=1
#NOTUIL=1
FAST=1
#FAST=1
#PROF=1

#HDF5=1
#DAP=1
#NCZARR=1
HDF5=1
DAP=1
#SZIP=1
#HDF4=1
#PNETCDF=1
#PAR4=1

#TESTSERVERS="localhost:8080,remotetest.unidata.ucar.edu"
#TESTSERVERS="localhost:8080,149.165.169.123:8080"

if test $# != 0 ; then
cmds=$@
Expand All @@ -31,11 +32,12 @@ CFLAGS=""
#CFLAGS="-Wall -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-parameter -Wno-char-subscripts -Wno-pointer-sign -Wno-format ${CFLAGS}"
CFLAGS="-Wall -Wno-unused-parameter -Wno-char-subscripts -Wno-pointer-sign ${CFLAGS}"
#CFLAGS="-Wall ${CFLAGS}"
#CFLAGS="-Wconversion"
#CFLAGS="$CFLAGS -Wconversion"
#CFLAGS="-Wall -Wunused-parameter -Wno-char-subscripts -Wno-pointer-sign ${CFLAGS}"

if test "x$MEM" = x1 ; then
export NC_VLEN_NOTEST=1
CFLAGS="-fsanitize=address ${CFLAGS}"
export NC_VLEN_NOTEST=1
CFLAGS="-fsanitize=address ${CFLAGS}"
fi

if test "x$ANSI" = x1 ; then
Expand Down Expand Up @@ -118,18 +120,20 @@ FLAGS="$FLAGS --enable-extreme-numbers"
#FLAGS="$FLAGS --disable-testsets"
#FLAGS="$FLAGS --disable-dap-remote-tests"
#FLAGS="$FLAGS --enable-dap-auth-tests" -- requires a new remotetest server
FLAGS="$FLAGS --enable-doxygen --enable-internal-docs"
#FLAGS="$FLAGS --enable-doxygen --enable-internal-docs"
FLAGS="$FLAGS --enable-logging"
#FLAGS="$FLAGS --disable-diskless"
FLAGS="$FLAGS --enable-mmap"
FLAGS="$FLAGS --enable-byterange"
#FLAGS="$FLAGS --enable-atexit-finalize"
#FLAGS="$FLAGS --with-udunits"
#FLAGS="$FLAGS --with-libcf"
#FLAGS="$FLAGS --enable-jna"
#FLAGS="$FLAGS --disable-properties-attribute"
#FLAGS="$FLAGS --disable-silent-rules"
#FLAGS="$FLAGS --disable-filter-testing"
#FLAGS="$FLAGS --enable-metadata-perf"
#FLAGS="$FLAGS --enable-extra-tests"
#FLAGS="$FLAGS --with-ncproperties-extra=key1=value1,key2=value2"
#FLAGS="$FLAGS --enable-valgrind"

Expand All @@ -141,6 +145,10 @@ if test "x$PAR4" != x1 ; then
FLAGS="$FLAGS --disable-parallel4"
fi

if test "x$NCZARR" = x1 ; then
FLAGS="$FLAGS --enable-nczarr"
fi

if test "x$NOUTIL" = x1 ; then
FLAGS="$FLAGS --disable-utilities"
fi
Expand Down
6 changes: 3 additions & 3 deletions examples/C/filter_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ netcdf bzip2 {

#include <hdf5.h>
#include "netcdf.h"
#include "netcdf_filter.h"

/* The HDF assigned id for bzip compression */
#define BZIP2_ID 307
Expand Down Expand Up @@ -169,7 +170,6 @@ test_bzip2(void)
{
int i;
unsigned int level = BZIP2_LEVEL;
unsigned int id=0;
size_t nparams = 0;

printf("\n*** Testing API: bzip2 compression.\n");
Expand Down Expand Up @@ -204,8 +204,8 @@ test_bzip2(void)

/* Read back the compression info and verify it */
level = 0;
CHECK(nc_inq_var_filter(ncid,varid,&id,&nparams,&level));
if(id != BZIP2_ID || nparams != 1 || level != BZIP2_LEVEL) {
CHECK(nc_inq_var_filter_info(ncid,varid,BZIP2_ID,&nparams,&level));
if(nparams != 1 || level != BZIP2_LEVEL) {
printf("test_filter: filter def/inq mismatch\n");
return NC_EFILTER;
}
Expand Down
9 changes: 8 additions & 1 deletion include/hdf5internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ typedef struct NC_HDF5_TYPE_INFO
hid_t native_hdf_typeid;
} NC_HDF5_TYPE_INFO_T;

/* Forward */
struct NC_FILTER_OBJ_HDF5;

/* Logging and debugging. */
void reportopenobjects(int log, hid_t);
int hdf5_set_log_level();
Expand Down Expand Up @@ -171,11 +174,15 @@ int nc4_get_var_meta(NC_VAR_INFO_T *var);


/* Define Filter API Function */
int nc4_filter_action(int action, int formatx, int id, NC_FILTER_INFO* info);
int nc4_global_filter_action(int action, unsigned int id, struct NC_FILTER_OBJ_HDF5* infop);
int NC4_hdf5_addfilter(NC_VAR_INFO_T* var, int active, unsigned int id, size_t nparams, unsigned int* params);
int NC4_hdf5_remove_filter(NC_VAR_INFO_T* var, unsigned int filterid);

/* Support functions for provenance info (defined in nc4hdf.c) */
extern int NC4_hdf5get_libversion(unsigned*,unsigned*,unsigned*);/*libsrc4/nc4hdf.c*/
extern int NC4_hdf5get_superblock(struct NC_FILE_INFO*, int*);/*libsrc4/nc4hdf.c*/
extern int NC4_isnetcdf4(struct NC_FILE_INFO*); /*libsrc4/nc4hdf.c*/

extern int nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);

#endif /* _HDF5INTERNAL_ */
3 changes: 3 additions & 0 deletions include/nc4dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ extern "C" {
EXTERNL int
NC4_show_metadata(int);

EXTERNL int
NC4_filter_actions(int, int, int, struct NC_Filterobject*);

#if defined(__cplusplus)
}
#endif
Expand Down
62 changes: 56 additions & 6 deletions include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdarg.h>
#include <string.h>

#include "ncdimscale.h"
Expand All @@ -27,6 +26,7 @@

#include "netcdf_f.h"
#include "netcdf_mem.h"
#include "netcdf_filter.h"
#ifdef USE_PARALLEL
#include "netcdf_par.h"
#endif /* USE_PARALLEL */
Expand Down Expand Up @@ -126,6 +126,7 @@ typedef enum {NC_FALSE = 0, NC_TRUE = 1} nc_bool_t;
/* Forward declarations. */
struct NC_GRP_INFO;
struct NC_TYPE_INFO;
struct NC_FIlterobject;

/**
* This struct provides indexed Access to Meta-data objects. See the
Expand Down Expand Up @@ -212,16 +213,12 @@ typedef struct NC_VAR_INFO
int parallel_access; /**< Type of parallel access for I/O on variable (collective or independent) */
nc_bool_t dimscale; /**< True if var is a dimscale */
nc_bool_t *dimscale_attached; /**< Array of flags that are true if dimscale is attached for that dim index */
nc_bool_t deflate; /**< True if var has deflate filter applied */
int deflate_level; /**< If deflate is true, this is the deflate level, between 0 and 9. */
nc_bool_t shuffle; /**< True if var has shuffle filter applied */
nc_bool_t fletcher32; /**< True if var has fletcher32 filter applied */
size_t chunk_cache_size, chunk_cache_nelems;
float chunk_cache_preemption;
void *format_var_info; /**< Pointer to any binary format info. */
unsigned int filterid; /**< ID for arbitrary filter. */
size_t nparams; /**< nparams for arbitrary filter. */
unsigned int *params; /**< Params for arbitrary filter. */
NClist* filters; /**< List<NC_FILTER_SPEC> */
} NC_VAR_INFO_T;

/** This is a struct to handle the field metadata from a user-defined
Expand Down Expand Up @@ -452,4 +449,57 @@ int log_metadata_nc(NC_FILE_INFO_T *h5);
/* Binary searcher for reserved attributes */
extern const NC_reservedatt *NC_findreserved(const char *name);

/**************************************************/
/* Internal filter related structures */

/* Internal filter actions */
#define NCFILTER_DEF 1
#define NCFILTER_REMOVE 2
#define NCFILTER_INQ 3
#define NCFILTER_FILTERIDS 4
#define NCFILTER_INFO 5
#define NCFILTER_FREESPEC 6
#define NCFILTER_CLIENT_REG 10
#define NCFILTER_CLIENT_UNREG 11
#define NCFILTER_CLIENT_INQ 12

typedef enum NC_FILTER_SORT {
NC_FILTER_SORT_SPEC=((int)1),
NC_FILTER_SORT_IDS=((int)2),
NC_FILTER_SORT_CLIENT=((int)3),
} NC_FILTER_SORT;

/* Provide structs to pass args to filter_actions function for HDF5*/

typedef struct NC_FILTER_SPEC_HDF5 {
int active; /**< true iff HDF5 library was told to activate filter */
unsigned int filterid; /**< ID for arbitrary filter. */
size_t nparams; /**< nparams for arbitrary filter. */
unsigned int* params; /**< Params for arbitrary filter. */
} NC_FILTER_SPEC_HDF5;

typedef struct NC_FILTERIDS_HDF5 {
size_t nfilters; /**< number of filters */
unsigned int* filterids; /**< Filter ids. */
} NC_FILTERIDS_HDF5;

typedef struct NC_FILTER_CLIENT_HDF5 {
unsigned int id;
/* The filter info for hdf5 */
/* Avoid needing hdf.h by using void* */
void* info;
} NC_FILTER_CLIENT_HDF5;

typedef struct NC_FILTER_OBJ_HDF5 {
NC_Filterobject hdr; /* So we can cast it */
NC_FILTER_SORT sort; /* discriminate union */
union {
NC_FILTER_SPEC_HDF5 spec;
NC_FILTERIDS_HDF5 ids;
NC_FILTER_CLIENT_HDF5 client;
} u;
} NC_FILTER_OBJ_HDF5;

extern void NC4_freefilterspec(NC_FILTER_SPEC_HDF5* f);

#endif /* _NC4INTERNAL_ */
1 change: 0 additions & 1 deletion include/ncdispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ struct nc_vlen_t;

struct NC;


int NC_create(const char *path, int cmode,
size_t initialsz, int basepe, size_t *chunksizehintp,
int useparallel, void *parameters, int *ncidp);
Expand Down
14 changes: 8 additions & 6 deletions include/netcdf.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,13 @@ by the desired type. */
#define NC_ERCFILE (-133) /**< RC file failure */
#define NC_ENULLPAD (-134) /**< Header Bytes not Null-Byte padded */
#define NC_EINMEMORY (-135) /**< In-memory file error */
#define NC4_LAST_ERROR (-136) /**< @internal All netCDF errors > this. */
#define NC_ENOFILTER (-136) /**< Filter not defined on variable. */

#define NC4_LAST_ERROR (-137) /**< @internal All netCDF errors > this. */

/* Errors for all remote access methods(e.g. DAP and CDMREMOTE)*/
#define NC_EURL (NC_EDAPURL) /**< Malformed URL */
#define NC_ECONSTRAINT (NC_EDAPCONSTRAINT) /**< Malformed Constraint*/

/** @internal This is used in netCDF-4 files for dimensions without
* coordinate vars. */
Expand All @@ -486,10 +492,6 @@ by the desired type. */
* size_t. Doh! */
#define NC_HAVE_NEW_CHUNKING_API 1

/* Errors for all remote access methods(e.g. DAP and CDMREMOTE)*/
#define NC_EURL (NC_EDAPURL) /**< Malformed URL */
#define NC_ECONSTRAINT (NC_EDAPCONSTRAINT) /**< Malformed Constraint*/

/*
* The Interface
*/
Expand Down Expand Up @@ -900,7 +902,7 @@ nc_inq_var_endian(int ncid, int varid, int *endianp);
EXTERNL int
nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const unsigned int* parms);

/* Learn about the filter on a variable */
/* Learn about the first filter on a variable */
EXTERNL int
nc_inq_var_filter(int ncid, int varid, unsigned int* idp, size_t* nparams, unsigned int* params);

Expand Down
9 changes: 9 additions & 0 deletions include/netcdf_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#ifndef NETCDF_DISPATCH_H
#define NETCDF_DISPATCH_H

/* Forward */
struct NC_Filterobject;

/* This is the dispatch table, with a pointer to each netCDF
* function. */
struct NC_Dispatch
Expand Down Expand Up @@ -137,6 +140,10 @@ struct NC_Dispatch
int (*set_var_chunk_cache)(int, int, size_t, size_t, float);
int (*get_var_chunk_cache)(int ncid, int varid, size_t *sizep,
size_t *nelemsp, float *preemptionp);

/* Dispatch table Version 2 or later */
/* Handle all filter related actions. */
int (*filter_actions)(int ncid, int varid, int action, struct NC_Filterobject*);
};

#if defined(__cplusplus)
Expand Down Expand Up @@ -219,6 +226,8 @@ extern "C" {
EXTERNL int NC_NOTNC4_inq_user_type(int, nc_type, char *, size_t *,
nc_type *, size_t *, int *);
EXTERNL int NC_NOTNC4_inq_typeid(int, const char *, nc_type *);
EXTERNL int NC_NOTNC4_filter_actions(int, int, int, struct NC_Filterobject*);

#if defined(__cplusplus)
}
#endif
Expand Down

0 comments on commit 44d0dca

Please sign in to comment.