diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index cb3db9d9c9..abe3d33905 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -51,6 +51,7 @@ If you know of missing, please email: info@openexr.com. * Paul Schneider * Peter Hillman * Peter Steneteg +* Phil Barrett * Piotr Stanczyk * Ralph Potter * Reto Kromer diff --git a/src/lib/OpenEXRCore/internal_huf.c b/src/lib/OpenEXRCore/internal_huf.c index ceb9319fba..d3dfbfb945 100644 --- a/src/lib/OpenEXRCore/internal_huf.c +++ b/src/lib/OpenEXRCore/internal_huf.c @@ -6,9 +6,12 @@ #include "internal_huf.h" #include "internal_memory.h" +#include "internal_xdr.h" +#include "internal_structs.h" #include #include +#include #include #define HUF_ENCBITS 16 @@ -18,6 +21,11 @@ #define HUF_DECSIZE (1 << HUF_DECBITS) #define HUF_DECMASK (HUF_DECSIZE - 1) +#define SHORT_ZEROCODE_RUN 59 +#define LONG_ZEROCODE_RUN 63 +#define SHORTEST_LONG_RUN (2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN) +#define LONGEST_LONG_RUN (255 + SHORTEST_LONG_RUN) + typedef struct _HufDec { int32_t len; @@ -416,11 +424,6 @@ hufBuildEncTable ( // n zeroes (6 or more) 63 n-6 (6 + 8 bits) // -#define SHORT_ZEROCODE_RUN 59 -#define LONG_ZEROCODE_RUN 63 -#define SHORTEST_LONG_RUN (2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN) -#define LONGEST_LONG_RUN (255 + SHORTEST_LONG_RUN) - static void hufPackEncTable ( const uint64_t* hcode, // i : encoding table [HUF_ENCSIZE] @@ -614,10 +617,7 @@ hufBuildDecTable ( internal_exr_free (p); } - else - { - pl->p = (uint32_t*) internal_exr_alloc (sizeof (uint32_t)); - } + else { pl->p = (uint32_t*) internal_exr_alloc (sizeof (uint32_t)); } if (!pl->p) return EXR_ERR_OUT_OF_MEMORY; @@ -789,14 +789,8 @@ hufEncode ( while (cs-- > 0) \ *out++ = s; \ } \ - else if (out < oe) \ - { \ - *out++ = (uint16_t) po; \ - } \ - else \ - { \ - return EXR_ERR_CORRUPT_CHUNK; \ - } \ + else if (out < oe) { *out++ = (uint16_t) po; } \ + else { return EXR_ERR_CORRUPT_CHUNK; } \ } while (0) // @@ -942,6 +936,727 @@ readUInt (const uint8_t* b) /**************************************/ +// Longest compressed code length that ImfHuf supports (58 bits) +#define MAX_CODE_LEN 58 + +// Number of bits in our acceleration table. Should match all +// codes up to TABLE_LOOKUP_BITS in length. +#define TABLE_LOOKUP_BITS 14 + +#include + +#ifdef __APPLE__ +# include +# define READ64(c) OSSwapInt64 (*(const uint64_t*) (c)); +#elif defined(linux) +# include +# define READ64(c) bswap_64 (*(const uint64_t*) (c)); +#elif defined(_MSC_VER) +# include +# define READ64(c) _byteswap_uint64 (*(const uint64_t*) (c)) +#else +# define READ64(c) \ + ((uint64_t) (c)[0] << 56) | ((uint64_t) (c)[1] << 48) | \ + ((uint64_t) (c)[2] << 40) | ((uint64_t) (c)[3] << 32) | \ + ((uint64_t) (c)[4] << 24) | ((uint64_t) (c)[5] << 16) | \ + ((uint64_t) (c)[6] << 8) | ((uint64_t) (c)[7]) +#endif + +typedef struct FastHufDecoder +{ + int _rleSymbol; // RLE symbol written by the encoder. + // This could be 65536, so beware + // when you use shorts to hold things. + + int _numSymbols; // Number of symbols in the codebook. + + unsigned char _minCodeLength; // Minimum code length, in bits. + unsigned char _maxCodeLength; // Maximum code length, in bits. + + int _idToSymbol[65536 + 1]; // Maps Ids to symbols. Ids are a symbol + // ordering sorted first in terms of + // code length, and by code within + // the same length. Ids run from 0 + // to mNumSymbols-1. + + uint64_t _ljBase[MAX_CODE_LEN + 1 + 1]; // the 'left justified base' table. + // Takes base[i] (i = code length) + // and 'left justifies' it into an uint64_t + // Also includes a sentinel terminator + + uint64_t _ljOffset[MAX_CODE_LEN + 1]; // There are some other terms that can + // be folded into constants when taking + // the 'left justified' decode path. This + // holds those constants, indexed by + // code length + + // + // We can accelerate the 'left justified' processing by running the + // top TABLE_LOOKUP_BITS through a LUT, to find the symbol and code + // length. These are those acceleration tables. + // + // Even though our eventual 'symbols' are ushort's, the encoder adds + // a symbol to indicate RLE. So with a dense code book, we could + // have 2^16+1 codes, hence 'symbol' could be bigger than 16 bits. + // + int _lookupSymbol + [1 << TABLE_LOOKUP_BITS]; /* value = (codeLen << 24) | symbol */ + + uint64_t _tableMin; +} FastHufDecoder; + +static exr_result_t +FastHufDecoder_buildTables ( + const struct _internal_exr_context* pctxt, + FastHufDecoder* fhd, + uint64_t* base, + uint64_t* offset) +{ + // + // Build the 'left justified' base table, by shifting base left.. + // + + for (int i = 0; i <= MAX_CODE_LEN; ++i) + { + if (base[i] != 0xffffffffffffffffULL) + { + fhd->_ljBase[i] = base[i] << (64 - i); + } + else + { + // + // Unused code length - insert dummy values + // + + fhd->_ljBase[i] = 0xffffffffffffffffULL; + } + } + fhd->_ljBase[MAX_CODE_LEN + 1] = 0; /* sentinel for brute force lookup */ + + // + // Combine some terms into a big fat constant, which for + // lack of a better term we'll call the 'left justified' + // offset table (because it serves the same function + // as 'offset', when using the left justified base table. + // + + fhd->_ljOffset[0] = offset[0] - fhd->_ljBase[0]; + for (int i = 1; i <= MAX_CODE_LEN; ++i) + fhd->_ljOffset[i] = offset[i] - (fhd->_ljBase[i] >> (64 - i)); + + // + // Build the acceleration tables for the lookups of + // short codes ( <= TABLE_LOOKUP_BITS long) + // + + for (uint64_t i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i) + { + uint64_t value = i << (64 - TABLE_LOOKUP_BITS); + + fhd->_lookupSymbol[i] = 0xffff; + + for (int codeLen = fhd->_minCodeLength; codeLen <= fhd->_maxCodeLength; + ++codeLen) + { + if (fhd->_ljBase[codeLen] <= value) + { + uint64_t id = + fhd->_ljOffset[codeLen] + (value >> (64 - codeLen)); + if (id < (uint64_t) (fhd->_numSymbols)) + { + fhd->_lookupSymbol[i] = + (fhd->_idToSymbol[id] | (codeLen << 24)); + } + else + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (Overrun)"); + return EXR_ERR_CORRUPT_CHUNK; + } + break; + } + } + } + + // + // Store the smallest value in the table that points to real data. + // This should be the entry for the largest length that has + // valid data (in our case, non-dummy _ljBase) + // + + int minIdx = TABLE_LOOKUP_BITS; + + while (minIdx > 0 && fhd->_ljBase[minIdx] == 0xffffffffffffffffULL) + minIdx--; + + if (minIdx < 0) + { + // + // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used. + // Set the min value such that the table is never tested. + // + + fhd->_tableMin = 0xffffffffffffffffULL; + } + else { fhd->_tableMin = fhd->_ljBase[minIdx]; } + return EXR_ERR_SUCCESS; +} + +static inline void +FastHufDecoder_refill ( + uint64_t* buffer, + int numBits, // number of bits to refill + uint64_t* bufferBack, // the next 64-bits, to refill from + int* bufferBackNumBits, // number of bits left in bufferBack + const uint8_t** currByte, // current byte in the bitstream + int* currBitsLeft) +{ + // + // Refill bits into the bottom of buffer, from the top of bufferBack. + // Always top up buffer to be completely full. + // + + *buffer |= (*bufferBack) >> (64 - numBits); + + if (*bufferBackNumBits < numBits) + { + numBits -= *bufferBackNumBits; + + // + // Refill all of bufferBack from the bitstream. Either grab + // a full 64-bit chunk, or whatever bytes are left. If we + // don't have 64-bits left, pad with 0's. + // + + if (*currBitsLeft >= 64) + { + *bufferBack = READ64 (*currByte); + *bufferBackNumBits = 64; + *currByte += sizeof (uint64_t); + *currBitsLeft -= 8 * sizeof (uint64_t); + } + else + { + *bufferBack = 0; + *bufferBackNumBits = 64; + + uint64_t shift = 56; + + while (*currBitsLeft > 0) + { + *bufferBack |= ((uint64_t) (**currByte)) << shift; + + (*currByte)++; + shift -= 8; + *currBitsLeft -= 8; + } + + // + // At this point, currBitsLeft might be negative, just because + // we're subtracting whole bytes. To keep anyone from freaking + // out, zero the counter. + // + + if (*currBitsLeft < 0) *currBitsLeft = 0; + } + + *buffer |= (*bufferBack) >> (64 - numBits); + } + + // + // We can have cases where the previous shift of bufferBack is << 64 - + // this is an undefined operation but tends to create just zeroes. + // so if we won't have any bits left, zero out bufferBack insetad of computing the shift + // + + if (*bufferBackNumBits <= numBits) { *bufferBack = 0; } + else { *bufferBack = (*bufferBack) << numBits; } + *bufferBackNumBits -= numBits; +} + +static inline uint64_t +fasthuf_read_bits ( + int numBits, uint64_t* buffer, int* bufferNumBits, const uint8_t** currByte) +{ + while (*bufferNumBits < numBits) + { + *buffer = ((*buffer) << 8) | *((*currByte)++); + *bufferNumBits += 8; + } + + *bufferNumBits -= numBits; + return ((*buffer) >> (*bufferNumBits)) & ((1 << numBits) - 1); +} + +static exr_result_t +fasthuf_initialize ( + const struct _internal_exr_context* pctxt, + FastHufDecoder* fhd, + const uint8_t** table, + int numBytes, + int minSymbol, + int maxSymbol, + int rleSymbol) +{ + fhd->_rleSymbol = rleSymbol; + fhd->_numSymbols = 0; + fhd->_minCodeLength = 255; + fhd->_maxCodeLength = 0; + + // + // The 'base' table is the minimum code at each code length. base[i] + // is the smallest code (numerically) of length i. + // + + uint64_t base[MAX_CODE_LEN + 1]; + + // + // The 'offset' table is the position (in sorted order) of the first id + // of a given code lenght. Array is indexed by code length, like base. + // + + uint64_t offset[MAX_CODE_LEN + 1]; + + // + // Count of how many codes at each length there are. Array is + // indexed by code length, like base and offset. + // + + size_t codeCount[MAX_CODE_LEN + 1]; + + for (int i = 0; i <= MAX_CODE_LEN; ++i) + { + codeCount[i] = 0; + base[i] = 0xffffffffffffffffULL; + offset[i] = 0; + } + + // + // Count the number of codes, the min/max code lengths, the number of + // codes with each length, and record symbols with non-zero code + // length as we find them. + // + + const uint8_t* currByte = *table; + uint64_t currBits = 0; + int currBitCount = 0; + + const uint8_t* topByte = *table + numBytes; + + for (uint64_t symbol = (uint64_t) minSymbol; symbol <= (uint64_t) maxSymbol; + symbol++) + { + if (currByte >= topByte) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Error decoding Huffman table (Truncated table data)."); + return EXR_ERR_CORRUPT_CHUNK; + } + + // + // Next code length - either: + // 0-58 (literal code length) + // 59-62 (various lengths runs of 0) + // 63 (run of n 0's, with n is the next 8 bits) + // + + uint64_t codeLen = + fasthuf_read_bits (6, &currBits, &currBitCount, &currByte); + + if (codeLen < (uint64_t) SHORT_ZEROCODE_RUN) + { + if (codeLen == 0) continue; + + if (codeLen < fhd->_minCodeLength) fhd->_minCodeLength = codeLen; + + if (codeLen > fhd->_maxCodeLength) fhd->_maxCodeLength = codeLen; + + codeCount[codeLen]++; + } + else if (codeLen == (uint64_t) LONG_ZEROCODE_RUN) + symbol += + fasthuf_read_bits (8, &currBits, &currBitCount, &currByte) + + SHORTEST_LONG_RUN - 1; + else + symbol += codeLen - SHORT_ZEROCODE_RUN + 1; + + if (symbol > (uint64_t) maxSymbol) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Error decoding Huffman table (Run beyond end of table)."); + return EXR_ERR_CORRUPT_CHUNK; + } + } + + for (int i = 0; i < MAX_CODE_LEN; ++i) + fhd->_numSymbols += codeCount[i]; + + if (fhd->_numSymbols > sizeof (fhd->_idToSymbol) / sizeof (int)) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Error decoding Huffman table (Too many symbols)."); + return EXR_ERR_CORRUPT_CHUNK; + } + + // + // Compute base - once we have the code length counts, there + // is a closed form solution for this + // + + { + double* countTmp = (double*) offset; /* temp space */ + + for (int l = fhd->_minCodeLength; l <= fhd->_maxCodeLength; ++l) + { + countTmp[l] = (double) codeCount[l] * + (double) (2ll << (fhd->_maxCodeLength - l)); + } + + for (int l = fhd->_minCodeLength; l <= fhd->_maxCodeLength; ++l) + { + double tmp = 0; + + for (int k = l + 1; k <= fhd->_maxCodeLength; ++k) + tmp += countTmp[k]; + + tmp /= (double) (2ll << (fhd->_maxCodeLength - l)); + + base[l] = (uint64_t) ceil (tmp); + } + } + + // + // Compute offset - these are the positions of the first + // id (not symbol) that has length [i] + // + + offset[fhd->_maxCodeLength] = 0; + + for (int i = fhd->_maxCodeLength - 1; i >= fhd->_minCodeLength; i--) + offset[i] = offset[i + 1] + codeCount[i + 1]; + + // + // Allocate and fill the symbol-to-id mapping. Smaller Ids should be + // mapped to less-frequent symbols (which have longer codes). Use + // the offset table to tell us where the id's for a given code + // length start off. + // + + uint64_t mapping[MAX_CODE_LEN + 1]; + for (int i = 0; i < MAX_CODE_LEN + 1; ++i) + mapping[i] = -1; + for (int i = fhd->_minCodeLength; i <= fhd->_maxCodeLength; ++i) + mapping[i] = offset[i]; + + currByte = *table; + currBits = 0; + currBitCount = 0; + + // + // Although we could have created an uncompressed list of symbols in our + // decoding loop above, it's faster to decode the compressed data again + // + for (uint64_t symbol = (uint64_t) minSymbol; symbol <= (uint64_t) maxSymbol; + symbol++) + { + uint64_t codeLen = + fasthuf_read_bits (6, &currBits, &currBitCount, &currByte); + + if (codeLen < (uint64_t) SHORT_ZEROCODE_RUN) + { + if (codeLen == 0) continue; + + if (mapping[codeLen] >= (uint64_t) fhd->_numSymbols) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (Invalid symbol in header)"); + return EXR_ERR_CORRUPT_CHUNK; + } + fhd->_idToSymbol[mapping[codeLen]] = symbol; + mapping[codeLen]++; + } + else if (codeLen == (uint64_t) LONG_ZEROCODE_RUN) + symbol += + fasthuf_read_bits (8, &currBits, &currBitCount, &currByte) + + SHORTEST_LONG_RUN - 1; + else + symbol += codeLen - SHORT_ZEROCODE_RUN + 1; + } + + *table = currByte; + + return FastHufDecoder_buildTables (pctxt, fhd, base, offset); +} + +static inline int +fasthuf_decode_enabled () +{ +#if defined(__INTEL_COMPILER) || defined(__GNUC__) + + // + // Enabled for ICC, GCC: + // __i386__ -> x86 + // __x86_64__ -> 64-bit x86 + // __e2k__ -> e2k (MCST Elbrus 2000) + +# if defined(__i386__) || defined(__x86_64__) || defined(__e2k__) + return 1; +# else + return 0; +# endif + +#elif defined(_MSC_VER) + + // + // Enabled for Visual Studio: + // _M_IX86 -> x86 + // _M_X64 -> 64bit x86 + +# if defined(_M_IX86) || defined(_M_X64) + return 1; +# else + return 0; +# endif + +#else + + // + // Unknown compiler - Be safe and disable. + // + return 0; +#endif +} + +static exr_result_t +fasthuf_decode ( + const struct _internal_exr_context* pctxt, + FastHufDecoder* fhd, + const uint8_t* src, + int numSrcBits, + uint16_t* dst, + int numDstElems) +{ + // + // Current position (byte/bit) in the src data stream + // (after the first buffer fill) + // + + const unsigned char* currByte = src + 2 * sizeof (uint64_t); + + numSrcBits -= 8 * 2 * sizeof (uint64_t); + + // + // 64-bit buffer holding the current bits in the stream + // + + uint64_t buffer = READ64 (src); + int bufferNumBits = 64; + + // + // 64-bit buffer holding the next bits in the stream + // + + uint64_t bufferBack = READ64 ((src + sizeof (uint64_t))); + int bufferBackNumBits = 64; + + int dstIdx = 0; + + while (dstIdx < numDstElems) + { + int codeLen; + int symbol; + + // + // Test if we can be table accelerated. If so, directly + // lookup the output symbol. Otherwise, we need to fall + // back to searching for the code. + // + // If we're doing table lookups, we don't really need + // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS + // left. But for a search, we do need a refilled table. + // + + if (fhd->_tableMin <= buffer) + { + int tableIdx = + fhd->_lookupSymbol[buffer >> (64 - TABLE_LOOKUP_BITS)]; + + // + // For invalid codes, _tableCodeLen[] should return 0. This + // will cause the decoder to get stuck in the current spot + // until we run out of elements, then barf that the codestream + // is bad. So we don't need to stick a condition like + // if (codeLen > _maxCodeLength) in this inner. + // + + codeLen = tableIdx >> 24; + symbol = tableIdx & 0xffffff; + } + else + { + // + // Brute force search: + // Find the smallest length where _ljBase[length] <= buffer + // + + codeLen = TABLE_LOOKUP_BITS + 1; + + /* sentinel zero can never be greater than buffer */ + while (fhd->_ljBase[codeLen] > + buffer /* && codeLen <= _maxCodeLength */) + codeLen++; + + if (codeLen > fhd->_maxCodeLength) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (Decoded an invalid symbol)"); + return EXR_ERR_CORRUPT_CHUNK; + } + + uint64_t id = fhd->_ljOffset[codeLen] + (buffer >> (64 - codeLen)); + if (id < (uint64_t) fhd->_numSymbols) + { + symbol = fhd->_idToSymbol[id]; + } + else + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (Decoded an invalid symbol)"); + return EXR_ERR_CORRUPT_CHUNK; + } + } + + // + // Shift over bit stream, and update the bit count in the buffer + // + + buffer = buffer << codeLen; + bufferNumBits -= codeLen; + + // + // If we recieved a RLE symbol (_rleSymbol), then we need + // to read ahead 8 bits to know how many times to repeat + // the previous symbol. Need to ensure we at least have + // 8 bits of data in the buffer + // + + if (symbol == fhd->_rleSymbol) + { + if (bufferNumBits < 8) + { + FastHufDecoder_refill ( + &buffer, + 64 - bufferNumBits, + &bufferBack, + &bufferBackNumBits, + &currByte, + &numSrcBits); + + bufferNumBits = 64; + } + + int rleCount = buffer >> 56; + + if (dstIdx < 1) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (RLE code with no previous symbol)"); + return EXR_ERR_CORRUPT_CHUNK; + } + + if (dstIdx + rleCount > numDstElems) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (Symbol run beyond expected output buffer length)"); + return EXR_ERR_CORRUPT_CHUNK; + } + + if (rleCount <= 0) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (Invalid RLE length)"); + return EXR_ERR_CORRUPT_CHUNK; + } + + for (int i = 0; i < rleCount; ++i) + dst[dstIdx + i] = dst[dstIdx - 1]; + + dstIdx += rleCount; + + buffer = buffer << 8; + bufferNumBits -= 8; + } + else + { + dst[dstIdx] = symbol; + dstIdx++; + } + + // + // refill bit stream buffer if we're below the number of + // bits needed for a table lookup + // + + if (bufferNumBits < 64) + { + FastHufDecoder_refill ( + &buffer, + 64 - bufferNumBits, + &bufferBack, + &bufferBackNumBits, + &currByte, + &numSrcBits); + + bufferNumBits = 64; + } + } + + if (numSrcBits != 0) + { + if (pctxt) + pctxt->print_error ( + pctxt, + EXR_ERR_CORRUPT_CHUNK, + "Huffman decode error (%d bits of compressed data remains after filling expected output buffer)", + numSrcBits); + return EXR_ERR_CORRUPT_CHUNK; + } + + return EXR_ERR_SUCCESS; +} + +/**************************************/ + uint64_t internal_exr_huf_compress_spare_bytes (void) { @@ -961,6 +1676,7 @@ internal_exr_huf_decompress_spare_bytes (void) ret += HUF_DECSIZE * sizeof (HufDec); // hdec // ret += HUF_ENCSIZE * sizeof (uint64_t*); // fheap // ret += HUF_ENCSIZE * sizeof (uint64_t); // scode + if (sizeof (FastHufDecoder) > ret) ret = sizeof (FastHufDecoder); return ret; } @@ -1028,18 +1744,21 @@ internal_huf_compress ( exr_result_t internal_huf_decompress ( - const uint8_t* compressed, - uint64_t nCompressed, - uint16_t* raw, - uint64_t nRaw, - void* spare, - uint64_t sparebytes) + exr_decode_pipeline_t* decode, + const uint8_t* compressed, + uint64_t nCompressed, + uint16_t* raw, + uint64_t nRaw, + void* spare, + uint64_t sparebytes) { - uint32_t im, iM, nBits; - uint64_t nBytes; - const uint8_t* ptr; - exr_result_t rv; + uint32_t im, iM, nBits; + uint64_t nBytes; + const uint8_t* ptr; + exr_result_t rv; + const struct _internal_exr_context* pctxt = NULL; + if (decode) pctxt = EXR_CCTXT (decode->context); // // need at least 20 bytes for header // @@ -1065,27 +1784,25 @@ internal_huf_decompress ( nBytes = (((uint64_t) (nBits) + 7)) / 8; if (ptr + nBytes > compressed + nCompressed) return EXR_ERR_OUT_OF_MEMORY; - // - // Fast decoder needs at least 2x64-bits of compressed data, and - // needs to be run-able on this platform. Otherwise, fall back - // to the original decoder - // -#if 0 - if (FastHufDecoder::enabled () && nBits > 128) + // + // Fast decoder needs at least 2x64-bits of compressed data, and + // needs to be run-able on this platform. Otherwise, fall back + // to the original decoder + // + if (fasthuf_decode_enabled () && nBits > 128) { - FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); + FastHufDecoder* fhd = (FastHufDecoder*) spare; // must be nBytes remaining in buffer - if (ptr - compressed + nBytes > static_cast (nCompressed)) - { - notEnoughData (); - return; - } + if (ptr - compressed + nBytes > (uint64_t) nCompressed) + return EXR_ERR_OUT_OF_MEMORY; - rv = fhd.decode (ptr, nBits, raw, nRaw); + rv = fasthuf_initialize ( + pctxt, fhd, &ptr, nCompressed - (ptr - compressed), im, iM, iM); + if (rv == EXR_ERR_SUCCESS) + rv = fasthuf_decode (pctxt, fhd, ptr, nBits, raw, nRaw); } else -#endif { uint64_t* freq = (uint64_t*) spare; HufDec* hdec = (HufDec*) (freq + HUF_ENCSIZE); diff --git a/src/lib/OpenEXRCore/internal_huf.h b/src/lib/OpenEXRCore/internal_huf.h index 8eca85a5bb..0e37ee0c87 100644 --- a/src/lib/OpenEXRCore/internal_huf.h +++ b/src/lib/OpenEXRCore/internal_huf.h @@ -7,6 +7,7 @@ #define OPENEXR_CORE_HUF_CODING_H #include "openexr_errors.h" +#include "openexr_decode.h" uint64_t internal_exr_huf_compress_spare_bytes (void); uint64_t internal_exr_huf_decompress_spare_bytes (void); @@ -21,11 +22,12 @@ exr_result_t internal_huf_compress ( uint64_t sparebytes); exr_result_t internal_huf_decompress ( - const uint8_t* compressed, - uint64_t nCompressed, - uint16_t* raw, - uint64_t nRaw, - void* spare, - uint64_t sparebytes); + exr_decode_pipeline_t* decode, + const uint8_t* compressed, + uint64_t nCompressed, + uint16_t* raw, + uint64_t nRaw, + void* spare, + uint64_t sparebytes); #endif /* OPENEXR_CORE_HUF_CODING_H */ diff --git a/src/lib/OpenEXRCore/internal_piz.c b/src/lib/OpenEXRCore/internal_piz.c index 3435d23a7a..fd16498c6a 100644 --- a/src/lib/OpenEXRCore/internal_piz.c +++ b/src/lib/OpenEXRCore/internal_piz.c @@ -464,10 +464,7 @@ internal_exr_apply_piz (exr_encode_pipeline_t* encode) if ((cury % curc->y_samples) != 0) continue; tmp += ((uint64_t) (y / curc->y_samples)) * bpl; } - else - { - tmp += ((uint64_t) y) * bpl; - } + else { tmp += ((uint64_t) y) * bpl; } memcpy (tmp, packed, bpl); priv_to_native16 (tmp, nx * (curc->bytes_per_element / 2)); @@ -625,7 +622,13 @@ internal_exr_undo_piz ( wavbuf = decode->scratch_buffer_1; rv = internal_huf_decompress ( - packed + nBytes, hufbytes, wavbuf, outsz / 2, hufspare, hufSpareBytes); + decode, + packed + nBytes, + hufbytes, + wavbuf, + outsz / 2, + hufspare, + hufSpareBytes); if (rv != EXR_ERR_SUCCESS) return rv; // diff --git a/src/test/OpenEXRCoreTest/compression.cpp b/src/test/OpenEXRCoreTest/compression.cpp index f5a223ef87..013ca8cc51 100644 --- a/src/test/OpenEXRCoreTest/compression.cpp +++ b/src/test/OpenEXRCoreTest/compression.cpp @@ -1,6 +1,14 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright Contributors to the OpenEXR Project. +// Windows specific addition to prevent the indirect import of the redefined min/max macros +#if defined _WIN32 || defined _WIN64 +# ifdef NOMINMAX +# undef NOMINMAX +# endif +# define NOMINMAX +#endif + #include "write.h" #include "test_value.h" @@ -11,6 +19,7 @@ #include #include +#include #include #include #include @@ -1355,9 +1364,13 @@ testHUF (const std::string& tempdir) { uint64_t esize = internal_exr_huf_compress_spare_bytes (); uint64_t dsize = internal_exr_huf_decompress_spare_bytes (); + // decsize 1 << 16 + 1 + // decsize 1 << 14 EXRCORE_TEST (esize == 65537 * (8 + 8 + sizeof (uint64_t*) + 4)); - EXRCORE_TEST ( - dsize == (65537 * 8 + (1 << 14) * (sizeof (uint32_t*) + 4 + 4))); + const uint64_t hufdecsize = (sizeof (uint32_t*) + sizeof(int32_t) + sizeof(uint32_t)); + // sizeof(FastHufDecoder) is bother to manually compute, just assume it's ok + // if it's returning at least enough for the slow path + EXRCORE_TEST (dsize >= (65537 * sizeof(uint64_t) + 16383 * hufdecsize)); std::vector hspare; @@ -1396,6 +1409,7 @@ testHUF (const std::string& tempdir) pixels decode = p; EXRCORE_TEST_RVAL (internal_huf_decompress ( + NULL, encoded.data (), ebytes, decode.h.data (), @@ -1430,6 +1444,7 @@ testHUF (const std::string& tempdir) } } EXRCORE_TEST_RVAL (internal_huf_decompress ( + NULL, encoded.data (), ebytes, decode.h.data (), @@ -1464,6 +1479,7 @@ testHUF (const std::string& tempdir) } } EXRCORE_TEST_RVAL (internal_huf_decompress ( + NULL, encoded.data (), ebytes, decode.h.data (),