Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

165 hdf5 in memory using core driver #173

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 9 additions & 2 deletions setup.py
Expand Up @@ -304,13 +304,16 @@ def get_hdf5_version(headername):
_Package = PosixPackage
_platdep = { # package tag -> platform-dependent components
'HDF5': ['hdf5'],
'HDF5HL': ['hdf5_hl'],
'LZO2': ['lzo2'],
'LZO': ['lzo'],
'BZ2': ['bz2'],
}
elif os.name == 'nt':
_Package = WindowsPackage
_platdep = { # package tag -> platform-dependent components
# TODO check if this is working
'HDF5HL': ['hdf5_hldll', 'hdf5_hldll'],
'HDF5': ['hdf5dll', 'hdf5dll'],
'LZO2': ['lzo2', 'lzo2'],
'LZO': ['liblzo', 'lzo1'],
Expand All @@ -326,6 +329,8 @@ def get_hdf5_version(headername):

hdf5_package = _Package("HDF5", 'HDF5', 'H5public', *_platdep['HDF5'])
hdf5_package.target_function = 'H5close'
hdf5hl_package = _Package("HDF5HL", 'HDF5HL', 'H5LTpublic', *_platdep['HDF5HL'])
hdf5hl_package.target_function = 'H5LTopen_file_image'
lzo2_package = _Package("LZO 2", 'LZO2', _cp('lzo/lzo1x'), *_platdep['LZO2'])
lzo2_package.target_function = 'lzo_version_date'
lzo1_package = _Package("LZO 1", 'LZO', 'lzo1x', *_platdep['LZO'])
Expand Down Expand Up @@ -436,6 +441,7 @@ def get_hdf5_version(headername):
c = new_compiler()
for (package, location) in [
(hdf5_package, HDF5_DIR),
(hdf5hl_package, HDF5_DIR),
(lzo2_package, LZO_DIR),
(lzo1_package, LZO_DIR),
(bzip2_package, BZIP2_DIR),
Expand Down Expand Up @@ -643,7 +649,7 @@ def find_name(base='tables'):
data_files.extend([('Lib/site-packages/%s'%name, dll_files),
])

ADDLIBS = [hdf5_package.library_name, ]
ADDLIBS = [hdf5_package.library_name, hdf5hl_package.library_name]
utilsExtension_libs = LIBS + ADDLIBS
hdf5Extension_libs = LIBS + ADDLIBS
tableExtension_libs = LIBS + ADDLIBS
Expand All @@ -659,7 +665,7 @@ def find_name(base='tables'):
(bzip2_package, _comp_bzip2_libs), ]:

if package.tag in optional_libs:
complibs.extend([hdf5_package.library_name, package.library_name])
complibs.extend([hdf5_package.library_name, hdf5hl_package.library_name, package.library_name])

# List of Blosc file dependencies
blosc_files = ["blosc/blosc.c", "blosc/blosclz.c", "blosc/shuffle.c",
Expand Down Expand Up @@ -689,6 +695,7 @@ def find_name(base='tables'):
"src/H5ARRAY-opt.c",
"src/H5VLARRAY.c",
"src/H5ATTR.c",
"src/H5PCORE-mem.c",
] + blosc_files,
library_dirs=lib_dirs,
libraries=hdf5Extension_libs,
Expand Down
82 changes: 82 additions & 0 deletions src/H5PCORE-mem.c
@@ -0,0 +1,82 @@
#include "hdf5.h"
#include <assert.h>
#include "H5PCORE-mem.h"
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>

hvl_t udata = {NULL, 0};

void *image_malloc(size_t size, H5FD_file_image_op_t file_image_op, void *udata) {
((hvl_t *) udata)->len = size;
return (malloc(size));
}

void *image_memcpy(void *dest, const void *src, size_t size,
H5FD_file_image_op_t file_image_op, void *udata) {
return (NULL); /* always fails */
}

void *image_realloc(void *ptr, size_t size, H5FD_file_image_op_t file_image_op,
void *udata) {
((hvl_t *) udata)->len = size;
return (realloc(ptr, size));
}

herr_t image_free(void *ptr, H5FD_file_image_op_t file_image_op, void *udata) {
((hvl_t *) udata)->p = ptr;
return (0); /* if we get here, we must have been successful */
}

void *udata_copy(void *udata) {
return udata;
}

herr_t udata_free(void *udata) {
return 0;
}
H5FD_file_image_callbacks_t callbacks = {image_malloc, image_memcpy,
image_realloc, image_free,
udata_copy, udata_free,
(void *) (&udata)};

hid_t H5Pset_file_inmemory_callbacks(hid_t fapl, hvl_t *udata)
{
callbacks.udata=udata;
return H5Pset_file_image_callbacks(fapl, &callbacks);
}

/*
hid_t H5Fcreate_inmemory(hvl_t *udata)
{
hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_core(fapl,65536,false);
callbacks.udata=udata;
H5Pset_file_image_callbacks(fapl, &callbacks);
hid_t file = H5Fcreate("in_memory", 0, H5P_DEFAULT, fapl);
H5Pclose(fapl);
return file;
}
*/


#if HAVE_HDF5HL_LIB
#include "hdf5_hl.h"
int H5PCOREhasHDF5HL() {
return true;
}

hid_t H5LTopen_file_image_proxy(void *buf_ptr, size_t buf_size, unsigned flags)
{
return H5LTopen_file_image(buf_ptr, buf_size, flags);
}
#else
int H5PCOREhasHDF5HL() {
return false;
}
hid_t H5LTopen_file_image_proxy(void *buf_ptr, size_t buf_size, unsigned flags)
{
return -1;
}

#endif
25 changes: 25 additions & 0 deletions src/H5PCORE-mem.h
@@ -0,0 +1,25 @@
/*
* Author: Michal Slonina <michal.slonina@gmail.com>
* Created on July 19, 2012, 11:41 AM
* Work on PyTables in-memory hdf5 file images file was kindly sponsored by DeltaMethod.
*/

#ifndef H5F_CORE_MEM_H
#define H5F_CORE_MEM_H

#include <H5Tpublic.h>

#ifdef __cplusplus
extern "C" {
#endif

hid_t H5Pset_file_inmemory_callbacks(hid_t fapl, hvl_t *udata);
int H5PCOREhasHDF5HL();
hid_t H5LTopen_file_image_proxy(void *buf_ptr, size_t buf_size, unsigned flags);

#ifdef __cplusplus
}
#endif

#endif /* H5F_CORE_MEM_H */

21 changes: 18 additions & 3 deletions tables/definitions.pxd
Expand Up @@ -24,6 +24,13 @@ cdef extern from "time.h":

from libc.stdio cimport FILE

# Python C API stuff
cdef extern from "Python.h":
object PyString_FromStringAndSize(char *s, Py_ssize_t len)
cdef int PyString_Check(object o)
Py_ssize_t PyString_Size(object string)
char* PyString_AsString(object string)


#-----------------------------------------------------------------------------

Expand Down Expand Up @@ -358,9 +365,12 @@ cdef extern from "hdf5.h" nogil:
unsigned int flags, size_t buf_size)
H5D_layout_t H5Pget_layout(hid_t plist)
int H5Pget_chunk(hid_t plist, int max_ndims, hsize_t *dims)
herr_t H5Pset_fapl_core(hid_t fapl_id, size_t increment,
hbool_t backing_store)

herr_t H5Pset_fapl_core(hid_t fapl_id, size_t increment, hbool_t backing_store)
# herr_t H5Pset_fapl_direct(hid_t fapl_id, size_t alignment, size_t block_size, size_t cbuf_size)
# herr_t H5Pset_fapl_log(hid_t fapl_id, const char *logfile, unsigned long long flags, size_t buf_size)
herr_t H5Pset_fapl_sec2(hid_t fapl_id)
herr_t H5Pset_fapl_stdio(hid_t fapl_id)

# Error Handling Interface
#herr_t H5Eget_auto(hid_t estack_id, H5E_auto_t *func, void** data)
herr_t H5Eset_auto(hid_t estack_id, H5E_auto_t func, void *data)
Expand Down Expand Up @@ -402,6 +412,11 @@ cdef extern from "H5ARRAY.h" nogil:
hsize_t *maxdims, H5T_class_t *super_class_id,
char *byteorder)

# Functions for operations with ARRAY
cdef extern from "H5PCORE-mem.h" nogil:
hid_t H5Pset_file_inmemory_callbacks(hid_t fapl, hvl_t *udata)
int H5PCOREhasHDF5HL()
hid_t H5LTopen_file_image_proxy(void *buf_ptr, size_t buf_size, unsigned flags)

# Some utilities
cdef extern from "utils.h":
Expand Down
20 changes: 10 additions & 10 deletions tables/file.py
Expand Up @@ -456,22 +456,22 @@ def _delfilters(self):
def __init__(self, filename, mode="r", title="",
rootUEP="/", filters=None, **kwargs):

self.filename = filename
"""The name of the opened file."""
self.mode = mode
"""The mode in which the file was opened."""

# Expand the form '~user'
path = os.path.expanduser(filename)
# Expand the environment variables
path = os.path.expandvars(path)

# Get all the parameters in parameter file(s)
params = dict([(k, v) for k, v in parameters.__dict__.iteritems()
if k.isupper() and not k.startswith('_')])
# Update them with possible keyword arguments
params.update(kwargs)

self.filename = filename
"""The name of the opened file."""
self.mode = mode
"""The mode in which the file was opened."""
if params['DRIVER']!="H5FD_CORE_INMEMORY":
# Expand the form '~user'
path = os.path.expanduser(filename)
# Expand the environment variables
path = os.path.expandvars(path)

# If MAX_*_THREADS is not set yet, set it to the number of cores
# on this machine.

Expand Down
87 changes: 63 additions & 24 deletions tables/hdf5Extension.pyx
Expand Up @@ -55,7 +55,7 @@ from utilsExtension cimport malloc_dims, get_native_type
from libc.stdlib cimport malloc, free
from libc.string cimport strdup
from numpy cimport import_array, ndarray
from cpython cimport PyString_AsString, PyString_FromStringAndSize
from cpython cimport PyString_AsString, PyString_FromStringAndSize, PyString_Check, PyString_Size, PyString_AsString


from definitions cimport (uintptr_t, hid_t, herr_t, hsize_t, hvl_t,
Expand All @@ -72,14 +72,16 @@ from definitions cimport (uintptr_t, hid_t, herr_t, hsize_t, hvl_t,
H5Dget_space, H5Dvlen_reclaim, H5Dget_storage_size, H5Dvlen_get_buf_size,
H5Tclose, H5Tis_variable_str, H5Tget_sign,
H5Adelete,
H5Pcreate, H5Pset_cache, H5Pclose,
H5Pcreate, H5Pset_cache, H5Pclose, H5Pset_fapl_core, H5Pset_fapl_sec2, H5Pset_fapl_stdio, H5Pset_file_inmemory_callbacks,
H5PCOREhasHDF5HL,
H5Sselect_all, H5Sselect_elements, H5Sselect_hyperslab,
H5Screate_simple, H5Sclose,
H5ATTRset_attribute, H5ATTRset_attribute_string,
H5ATTRget_attribute, H5ATTRget_attribute_string,
H5ATTRget_attribute_vlen_string_array,
H5ATTRfind_attribute, H5ATTRget_type_ndims, H5ATTRget_dims,
H5ARRAYget_ndims, H5ARRAYget_info,
H5LTopen_file_image_proxy,
set_cache_size, get_objinfo, get_linkinfo, Giterate, Aiterate, H5UIget_info,
get_len_of_range, conv_float64_timeval32, truncate_dset)

Expand Down Expand Up @@ -253,9 +255,17 @@ cdef class File:
cdef hid_t file_id
cdef hid_t access_plist
cdef object name
cdef hvl_t mem_data # Used only in case of memory write

def __get_supported_drivers(self):
return ["H5FD_SEC2", "H5FD_STDIO", "H5FD_CORE", "H5FD_CORE_INMEMORY", "H5FD_DIRECT", None]

def _g_new(self, name, pymode, **params):
# Check if we can handle the driver
driver = params['DRIVER']
if driver!=None and not driver in self.__get_supported_drivers():
raise NotImplementedError("File driver "+str(driver)+" is not implemented. Please choose one of the following drivers: "+str(self.__get_supported_drivers()))

# Create a new file using default properties
self.name = name

Expand All @@ -270,45 +280,70 @@ cdef class File:

# After the following check we can be quite sure
# that the file or directory exists and permissions are right.
checkFileAccess(name, pymode)
# But only if we are using file backed storage.
if driver != 'H5FD_CORE_INMEMORY':
checkFileAccess(name, pymode)

assert pymode in ('r', 'r+', 'a', 'w'), ("an invalid mode string ``%s`` "
"passed the ``checkFileAccess()`` test; "
"please report this to the authors" % pymode)

# Should a new file be created?
exists = os.path.exists(name)
exists = os.path.exists(name) if driver != 'H5FD_CORE_INMEMORY' else PyString_Check(params['H5FD_CORE_INMEMORY_IMAGE'])
self._v_new = not (
pymode in ('r', 'r+') or (pymode == 'a' and exists))

access_plist = H5Pcreate(H5P_FILE_ACCESS)
# The line below uses the CORE driver for doing I/O from memory, not disk
# In general it is a bad idea to do this because HDF5 will have to load
# the contents of the file on disk prior to operate, which takes time and
# resources.
# F. Alted 2010-04-15
#H5Pset_fapl_core(access_plist, 1024, 1)
# Set parameters for chunk cache
H5Pset_cache(access_plist, 0,
params['CHUNK_CACHE_NELMTS'],
params['CHUNK_CACHE_SIZE'],
params['CHUNK_CACHE_PREEMPT'])

if pymode == 'r':
self.file_id = H5Fopen(encname, H5F_ACC_RDONLY, access_plist)
elif pymode == 'r+':
self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
elif pymode == 'a':
if exists:
# A test for logging.
## H5Pset_sieve_buf_size(access_plist, 0)
## H5Pset_fapl_log (access_plist, "test.log", H5FD_LOG_LOC_WRITE, 0)

if driver=='H5FD_CORE_INMEMORY' and ( pymode == 'r' or pymode == 'r+' ):
if not H5PCOREhasHDF5HL():
raise RuntimeError("PyTables was compiled without HDF5HL library, H5FD_CORE_INMEMORY driver cannot be used for reading.")
if (not PyString_Check(params['H5FD_CORE_INMEMORY_IMAGE'])):
raise TypeError("H5FD_CORE_INMEMORY driver needs a string passed as H5FD_CORE_INMEMORY_IMAGE argument");
self.mem_data.len = PyString_Size(params['H5FD_CORE_INMEMORY_IMAGE'])
self.mem_data.p = <void *>PyString_AsString(params['H5FD_CORE_INMEMORY_IMAGE'])
self.file_id = H5LTopen_file_image_proxy(self.mem_data.p, self.mem_data.len, 0)
if self.file_id == -1:
raise RuntimeError("Can't open in-memory file for reading.");
else:
access_plist = H5Pcreate(H5P_FILE_ACCESS)
if driver == 'H5FD_STDIO':
H5Pset_fapl_stdio(access_plist)
elif driver == 'H5FD_SEC2':
H5Pset_fapl_sec2(access_plist)
elif driver == 'H5FD_CORE':
H5Pset_fapl_core(access_plist, params['H5FD_CORE_INCREMENT'], params['H5FD_CORE_BACKING_STORE'])
elif driver == 'H5FD_CORE_INMEMORY':
H5Pset_fapl_core(access_plist, params['H5FD_CORE_INCREMENT'], params['H5FD_CORE_BACKING_STORE'])
self.mem_data.len = 0
self.mem_data.p = <void *>0
H5Pset_file_inmemory_callbacks(access_plist, &self.mem_data)

# Set parameters for chunk cache
H5Pset_cache(access_plist, 0,
params['CHUNK_CACHE_NELMTS'],
params['CHUNK_CACHE_SIZE'],
params['CHUNK_CACHE_PREEMPT'])
if pymode == 'r':
self.file_id = H5Fopen(encname, H5F_ACC_RDONLY, access_plist)
elif pymode == 'r+':
self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
else:
elif pymode == 'a':
if exists:
# A test for logging.
## H5Pset_sieve_buf_size(access_plist, 0)
## H5Pset_fapl_log (access_plist, "test.log", H5FD_LOG_LOC_WRITE, 0)
self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
else:
self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC,
H5P_DEFAULT, access_plist)
elif pymode == 'w':
self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC,
H5P_DEFAULT, access_plist)
elif pymode == 'w':
self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC,
H5P_DEFAULT, access_plist)

if self.file_id < 0:
Expand All @@ -322,6 +357,10 @@ cdef class File:
# Set the maximum number of threads for Blosc
setBloscMaxThreads(params['MAX_BLOSC_THREADS'])

def getInMemoryFileContents(self):
#if (self.mem_data.len==0):
# raise RuntimeError("No data available in memory. Please make sure that the H5FD_CORE_INMEMORY driver is used and that the file is closed before calling this function.")
return PyString_FromStringAndSize(<char *>self.mem_data.p, self.mem_data.len)

# Accessor definitions
def _getFileId(self):
Expand Down