diff --git a/include/zmatlib.h b/include/zmatlib.h index e62fa58..7a76bb3 100644 --- a/include/zmatlib.h +++ b/include/zmatlib.h @@ -63,9 +63,15 @@ extern "C" * 4: lzma * 5: lz4 * 6: lz4hc + * 7: blosc2blosclz + * 8: blosc2lz4 + * 9: blosc2lz4hc + * 10: blosc2zlib + * 11: blosc2zstd + * -1: unknown */ -enum TZipMethod {zmZlib, zmGzip, zmBase64, zmLzip, zmLzma, zmLz4, zmLz4hc}; +enum TZipMethod {zmZlib, zmGzip, zmBase64, zmLzip, zmLzma, zmLz4, zmLz4hc, zmBlosc2Blosclz, zmBlosc2Lz4, zmBlosc2Lz4hc, zmBlosc2Zlib, zmBlosc2Zstd, zmUnknown=-1}; /** * @brief Main interface to perform compression/decompression diff --git a/private/zipmat.mexa64 b/private/zipmat.mexa64 deleted file mode 100755 index acb2498..0000000 Binary files a/private/zipmat.mexa64 and /dev/null differ diff --git a/private/zipmat.mexmaci64 b/private/zipmat.mexmaci64 deleted file mode 100755 index e520e5f..0000000 Binary files a/private/zipmat.mexmaci64 and /dev/null differ diff --git a/private/zipmat.mexw64 b/private/zipmat.mexw64 deleted file mode 100755 index 1955845..0000000 Binary files a/private/zipmat.mexw64 and /dev/null differ diff --git a/src/Makefile b/src/Makefile index 5921bd0..43dd628 100644 --- a/src/Makefile +++ b/src/Makefile @@ -11,6 +11,9 @@ ZMATDIR ?=$(ROOTDIR) LIBDIR ?=$(ROOTDIR)/lib MKDIR :=mkdir +HAVE_LZMA ?=yes +HAVE_LZ4 ?=yes +HAVE_BLOSC2?=yes MEX=mex AR=$(CC) @@ -23,7 +26,7 @@ DOXY := doxygen DOCDIR := $(ZMATDIR)/doc DOXYCFG=zmat.cfg -INCLUDEDIRS=-I../include -Ieasylzma -Ieasylzma/pavlov -Ilz4 +INCLUDEDIRS=-I../include CUOMPLINK= @@ -39,11 +42,7 @@ OUTPUTFLAG:=-o OBJSUFFIX=.o EXESUFFIX=.mex* -FILES=zmatlib lz4/lz4 lz4/lz4hc easylzma/compress easylzma/decompress \ -easylzma/lzma_header easylzma/lzip_header easylzma/common_internal \ -easylzma/pavlov/LzmaEnc easylzma/pavlov/LzmaDec easylzma/pavlov/LzmaLib \ -easylzma/pavlov/LzFind easylzma/pavlov/Bra easylzma/pavlov/BraIA64 \ -easylzma/pavlov/Alloc easylzma/pavlov/7zCrc +FILES=zmatlib ifeq ($(findstring CYGWIN,$(PLATFORM)), CYGWIN) ifeq ($(findstring x86_64,$(ARCH)), x86_64) @@ -76,6 +75,38 @@ else endif endif +ifeq ($(HAVE_LZMA),no) + CFLAGS+=-DNO_LZMA +else + INCLUDEDIRS+=-Ieasylzma -Ieasylzma/pavlov + FILES+=easylzma/compress easylzma/decompress \ + easylzma/lzma_header easylzma/lzip_header easylzma/common_internal \ + easylzma/pavlov/LzmaEnc easylzma/pavlov/LzmaDec easylzma/pavlov/LzmaLib \ + easylzma/pavlov/LzFind easylzma/pavlov/Bra easylzma/pavlov/BraIA64 \ + easylzma/pavlov/Alloc easylzma/pavlov/7zCrc +endif + +ifeq ($(HAVE_BLOSC2),no) + CFLAGS+=-DNO_BLOSC2 +else + ifeq ($(HAVE_LZ4),no) + INCLUDEDIRS+=-Ilz4 + FILES+= lz4/lz4 lz4/lz4hc + endif + LINKOPT+=-Lblosc2/lib -lblosc2 -Lblosc2/internal-complibs/zstd-1.5.2 -lzstd + INCLUDEDIRS+=-Iblosc2/include +endif + +ifeq ($(HAVE_LZ4),no) + CFLAGS+=-DNO_LZ4 +else + INCLUDEDIRS+=-Ilz4 + FILES+= lz4/lz4 lz4/lz4hc +endif + + + + ifeq ($(MAKECMDGOALS),lib) AR :=ar ARFLAGS :=cr @@ -135,18 +166,24 @@ OBJS := $(addsuffix $(OBJSUFFIX), $(FILES)) all dll lib mex oct: $(OUTPUT_DIR)/$(BINARY) +blosc: + @if [ $(HAVE_BLOSC2) = "yes" ]; then\ + $(MAKE) -C blosc2 all;\ + fi + makedirs: @if test ! -d $(OUTPUT_DIR); then $(MKDIR) $(OUTPUT_DIR); fi makedocdir: @if test ! -d $(DOCDIR); then $(MKDIR) $(DOCDIR); fi -$(OUTPUT_DIR)/$(BINARY): makedirs $(OBJS) +$(OUTPUT_DIR)/$(BINARY): makedirs $(OBJS) blosc $(OUTPUT_DIR)/$(BINARY): $(OBJS) @$(ECHO) Building $@ $(AR) $(ARFLAGS) $(OUTPUTFLAG) $@ $(OBJS) $(LINKOPT) $(USERLINKOPT) %$(OBJSUFFIX): %.cpp + @$(ECHO) Building $@ $(CXX) $(INCLUDEDIRS) $(CPPOPT) -c -o $@ $< %$(OBJSUFFIX): %.c @@ -159,6 +196,7 @@ $(OUTPUT_DIR)/$(BINARY): $(OBJS) clean: -rm -f $(OBJS) $(OUTPUT_DIR)/$(BINARY)$(EXESUFFIX) zmat$(OBJSUFFIX) $(LIBDIR)/* + -$(MAKE) -C blosc2 clean pretty: astyle \ diff --git a/src/zmat.cpp b/src/zmat.cpp index 5bd478d..2051919 100644 --- a/src/zmat.cpp +++ b/src/zmat.cpp @@ -66,15 +66,53 @@ const char* metadata[] = {"type", "size", "byte", "method", "status", "level"}; void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { TZipMethod zipid = zmZlib; int iscompress = 1; -#if defined(NO_LZ4) && defined(NO_LZMA) - const char* zipmethods[] = {"zlib", "gzip", "base64", ""}; -#elif !defined(NO_LZMA) && defined(NO_LZ4) - const char* zipmethods[] = {"zlib", "gzip", "base64", "lzip", "lzma", ""}; -#elif defined(NO_LZMA) && !defined(NO_LZ4) - const char* zipmethods[] = {"zlib", "gzip", "base64", "lz4", "lz4hc", ""}; -#else - const char* zipmethods[] = {"zlib", "gzip", "base64", "lzip", "lzma", "lz4", "lz4hc", ""}; + const char* zipmethods[] = { + "zlib", + "gzip", + "base64", +#if !defined(NO_LZMA) + "lzip", + "lzma", #endif +#if !defined(NO_LZ4) + "lz4", + "lz4hc", +#endif +#if !defined(NO_BLOSC2) + "blosc2blosclz", + "blosc2lz4", + "blosc2lz4hc", + "blosc2zlib", + "blosc2zstd", +#endif + ""}; + + const TZipMethod zipmethodid[] = { + zmZlib, + zmGzip, + zmBase64, +#if !defined(NO_LZMA) + zmLzip, + zmLzma, +#endif +#if !defined(NO_LZ4) + zmLz4, + zmLz4hc, +#endif +#if !defined(NO_BLOSC2) + zmBlosc2Blosclz, + zmBlosc2Lz4, + zmBlosc2Lz4hc, + zmBlosc2Zlib, + zmBlosc2Zstd, +#endif + zmUnknown}; + + int nthread = 1; /*nthread, shuffle and typesize are only used by blosc2 compressors*/ + int shuffle = 1; + int typesize = 4; + char clevel = 1; + int use4bytedim = 0; /** @@ -103,7 +141,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { if (nrhs >= 2) { double* val = mxGetPr(prhs[1]); - iscompress = val[0]; + clevel = val[0]; } if (nrhs >= 3) { @@ -116,7 +154,31 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { if ((zipid = (TZipMethod)zmat_keylookup((char*)mxArrayToString(prhs[2]), zipmethods)) < 0) { mexErrMsgTxt("the specified compression method is not supported"); } + + zipid = zipmethodid[(int)zipid]; + } + + if (nrhs >= 4) { + double* val = mxGetPr(prhs[3]); + nthread = val[0]; + } + + if (nrhs >= 5) { + double* val = mxGetPr(prhs[4]); + shuffle = val[0]; + } + + if (nrhs >= 6) { + double* val = mxGetPr(prhs[5]); + typesize = val[0]; + } + printf("iscompress=%X %d %d %d %X\n", clevel, nthread, shuffle, typesize, ((nthread & 0xFF) << 8)); + if(clevel) { + iscompress = (clevel | ((nthread & 0xFF) << 8) | ((shuffle & 0xFF) << 16) | ((typesize & 0xFF) << 24)); + } else { + iscompress = (clevel | ((nthread & 0xFF) << 8)); } + printf("iscompress=%X\n", iscompress); try { if (mxIsChar(prhs[0]) || (mxIsNumeric(prhs[0]) && !mxIsComplex(prhs[0])) || mxIsLogical(prhs[0])) { diff --git a/src/zmatlib.c b/src/zmatlib.c index 19425fd..94116cb 100644 --- a/src/zmatlib.c +++ b/src/zmatlib.c @@ -43,6 +43,7 @@ *******************************************************************************/ #include +#include #include #include #include @@ -60,6 +61,10 @@ #include "lz4/lz4hc.h" #endif +#ifndef NO_BLOSC2 + #include "blosc2.h" +#endif + #ifndef NO_LZMA /** * @brief Easylzma interface to perform compression @@ -112,7 +117,9 @@ const char* zmat_errcode[] = { "easylzma error, see info.status for error flag, often a result of mismatch in compression method",/*-4*/ "can not allocate output buffer",/*-5*/ "lz4 error, see info.status for error flag, often a result of mismatch in compression method",/*-6*/ - "unsupported method" /*-7*/ + "unsupported blosc2 codec",/*-7*/ + "blosc2 error, see info.status for error flag, often a result of mismatch in compression method",/*-8*/ + "unsupported method" /*-999*/ }; /** @@ -145,6 +152,8 @@ char* zmat_error(int id) { int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize, unsigned char** outputbuf, const int zipid, int* ret, const int iscompress) { z_stream zs; size_t buflen[2] = {0}; + unsigned int nthread=1, shuffle=1, typesize=4; + char compressflag = iscompress & 0xFF; *outputbuf = NULL; zs.zalloc = Z_NULL; @@ -154,8 +163,23 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize if (inputsize == 0) { return -1; } +printf("flag=%X\n", iscompress); + if((iscompress & 0xFF00) >> 8) { + nthread = (iscompress & 0xFF00) >> 8; + } +printf("nthread=%d\n", nthread); + if((iscompress & 0xFF0000) >> 16) { + shuffle = (iscompress & 0xFF0000) >> 16; + } +printf("shuffle=%d\n", shuffle); - if (iscompress) { + if((iscompress & 0xFF000000) >> 24) { + typesize = (iscompress & 0xFF000000) >> 24; + } +printf("typesize=%d\n", typesize); + +printf("flag=%X\n", compressflag); + if (compressflag) { /** * perform compression or encoding */ @@ -169,11 +193,11 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize * zlib (.zip) or gzip (.gz) compression */ if (zipid == zmZlib) { - if (deflateInit(&zs, (iscompress > 0) ? Z_DEFAULT_COMPRESSION : (-iscompress)) != Z_OK) { + if (deflateInit(&zs, (compressflag > 0) ? Z_DEFAULT_COMPRESSION : (-compressflag)) != Z_OK) { return -2; } } else { - if (deflateInit2(&zs, (iscompress > 0) ? Z_DEFAULT_COMPRESSION : (-iscompress), Z_DEFLATED, 15 | 16, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) { + if (deflateInit2(&zs, (compressflag > 0) ? Z_DEFAULT_COMPRESSION : (-compressflag), Z_DEFLATED, 15 | 16, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) { return -2; } } @@ -201,7 +225,7 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize * lzma (.lzma) or lzip (.lzip) compression */ *ret = simpleCompress((elzma_file_format)(zipid - 3), (unsigned char*)inputstr, - inputsize, outputbuf, outputsize, iscompress); + inputsize, outputbuf, outputsize, compressflag); if (*ret != ELZMA_E_OK) { return -4; @@ -222,7 +246,7 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize if (zipid == zmLz4) { *outputsize = LZ4_compress_default((const char*)inputstr, (char*)(*outputbuf), inputsize, *outputsize); } else { - *outputsize = LZ4_compress_HC((const char*)inputstr, (char*)(*outputbuf), inputsize, *outputsize, (iscompress > 0) ? 8 : (-iscompress)); + *outputsize = LZ4_compress_HC((const char*)inputstr, (char*)(*outputbuf), inputsize, *outputsize, (compressflag > 0) ? 8 : (-compressflag)); } *ret = *outputsize; @@ -231,9 +255,39 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize return -6; } +#endif +#ifndef NO_BLOSC2 + } else if (zipid >= zmBlosc2Blosclz || zipid <= zmBlosc2Zstd) { + /** + * blosc2 meta-compressor (support various filters and compression codecs) + */ + const char *codecs[] ={"blosclz", "lz4", "lz4hc", "zlib", "zstd"}; + if (blosc1_set_compressor(codecs[zipid - zmBlosc2Blosclz]) == -1) { + return -7; + } + + blosc2_set_nthreads(nthread); + + *outputsize = inputsize + BLOSC2_MAX_OVERHEAD; /* blosc2 guarantees the compression will always succeed at this size */ + + if (!(*outputbuf = (unsigned char*)malloc(*outputsize))) { + return -5; + } + *ret = blosc1_compress((compressflag > 0) ? 5 : (-compressflag), shuffle, typesize, inputsize, (const void*)inputstr, (void*)(*outputbuf), *outputsize); + + *outputsize = *ret; + + if (*outputsize < 0) { + return -8; + } + + if (!(*outputbuf = (unsigned char*)realloc(*outputbuf, *outputsize))) { + return -5; + } + #endif } else { - return -7; + return -999; } } else { /** @@ -267,7 +321,7 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize zs.next_in = inputstr; /* input char array*/ zs.avail_out = buflen[0]; /* size of output*/ - zs.next_out = (Bytef*)(*outputbuf); /*(Bytef *)(); // output char array*/ + zs.next_out = (Bytef*)(*outputbuf); /* output char array*/ while ((*ret = inflate(&zs, Z_SYNC_FLUSH)) != Z_STREAM_END && *ret != Z_DATA_ERROR && count <= 10) { *outputbuf = (unsigned char*)realloc(*outputbuf, (buflen[0] << count)); @@ -327,9 +381,40 @@ int zmat_run(const size_t inputsize, unsigned char* inputstr, size_t* outputsize return -6; } +#endif +#ifndef NO_BLOSC2 + } else if (zipid >= zmBlosc2Blosclz || zipid <= zmBlosc2Zstd) { + /** + * blosc2 meta-compressor (support various filters and compression codecs) + */ + int count = 2; + *outputsize = (inputsize << count); + + if (!(*outputbuf = (unsigned char*)malloc(*outputsize))) { + *ret = -5; + return *ret; + } + + while ((*ret = blosc1_decompress((const char*)inputstr, (char*)(*outputbuf), *outputsize)) <= 0 && count <= 10) { + *outputsize = (inputsize << count); + + if (!(*outputbuf = (unsigned char*)realloc(*outputbuf, *outputsize))) { + *ret = -5; + return *ret; + } + + count++; + } + + *outputsize = *ret; + + if (*ret < 0) { + return -8; + } + #endif } else { - return -7; + return -999; } } diff --git a/zmat.m b/zmat.m index 1add316..eb43301 100644 --- a/zmat.m +++ b/zmat.m @@ -77,6 +77,8 @@ error('input must be a char, non-complex numeric or logical vector or N-D array'); end +typesize=length(typecast(input(1), 'uint8')); + if (ischar(input)) input = uint8(input); end @@ -94,13 +96,22 @@ zipmethod = varargin{3}; end +opt=struct; +if (nargin > 4 && ischar(varargin{4}) && bitand(length(varargin), 1)==1) + opt=cell2struct(varargin(5:2:end), varargin(4:2:end), 2); +end + +nthread=getoption('nthread', 1, opt); +shuffle=getoption('shuffle', 1, opt); +typesize=getoption('typesize', typesize, opt); + iscompress = round(iscompress); if ((strcmp(zipmethod, 'zlib') || strcmp(zipmethod, 'gzip')) && iscompress <= -10) iscompress = -9; end -[varargout{1:max(1, nargout)}] = zipmat(input, iscompress, zipmethod); +[varargout{1:max(1, nargout)}] = zipmat(input, iscompress, zipmethod, nthread, shuffle, typesize); if (strcmp(zipmethod, 'base64') && iscompress > 1) varargout{1} = char(varargout{1}); @@ -119,3 +130,9 @@ end varargout{1} = reshape(varargout{1}, inputinfo.size); end + +function value=getoption(key, default, opt) +value=default; +if(isfield(opt, key)) + value=opt.(key); +end \ No newline at end of file