From 5baec4c354df09ba9f7bbd2bd729d2a7e2dec126 Mon Sep 17 00:00:00 2001 From: Connor Manning Date: Thu, 6 Sep 2018 15:25:42 -0500 Subject: [PATCH] Update arbiter bundle for gzip decompression support. --- vendor/arbiter/CMakeLists.txt | 6 +- vendor/arbiter/arbiter.cpp | 24 +++ vendor/arbiter/arbiter.hpp | 335 ++++++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 1 deletion(-) diff --git a/vendor/arbiter/CMakeLists.txt b/vendor/arbiter/CMakeLists.txt index a5e3207c05..d9bf6234fb 100644 --- a/vendor/arbiter/CMakeLists.txt +++ b/vendor/arbiter/CMakeLists.txt @@ -11,6 +11,10 @@ if (CURL_FOUND) add_definitions("-DARBITER_CURL") endif() +if (ZLIB_FOUND) + add_definitions("-DARBITER_ZLIB") +endif() + PDAL_ADD_FREE_LIBRARY(${PDAL_ARBITER_LIB_NAME} STATIC arbiter.cpp) target_include_directories(${PDAL_ARBITER_LIB_NAME} PRIVATE ${PDAL_JSONCPP_INCLUDE_DIR}) @@ -27,7 +31,7 @@ if (UNIX) target_compile_options(${PDAL_ARBITER_LIB_NAME} PRIVATE "-fPIC") else() target_compile_definitions(${PDAL_ARBITER_LIB_NAME} - PUBLIC -DARBITER_DLL_EXPORT -DARBITER_CURL) + PUBLIC -DARBITER_DLL_EXPORT -DARBITER_CURL -DARBITER_ZLIB) endif() set_target_properties(${PDAL_ARBITER_LIB_NAME} PROPERTIES diff --git a/vendor/arbiter/arbiter.cpp b/vendor/arbiter/arbiter.cpp index 01d1f86c3a..6e68af2f07 100644 --- a/vendor/arbiter/arbiter.cpp +++ b/vendor/arbiter/arbiter.cpp @@ -3192,6 +3192,12 @@ std::vector Dropbox::glob(std::string path, bool verbose) const #include #include #include + + +#ifdef ARBITER_ZLIB +#include +#endif + #endif #ifdef ARBITER_CURL @@ -3492,6 +3498,24 @@ Response Curl::get( // Run the command. const int httpCode(perform()); + + for (auto& h : receivedHeaders) + { + std::string& v(h.second); + while (v.size() && v.front() == ' ') v = v.substr(1); + while (v.size() && v.back() == ' ') v.pop_back(); + } + + if (receivedHeaders["Content-Encoding"] == "gzip") + { +#ifdef ARBITER_ZLIB + std::string s(gzip::decompress(data.data(), data.size())); + data.assign(s.begin(), s.end()); +#else + throw ArbiterError("Cannot decompress zlib"); +#endif + } + return Response(httpCode, data, receivedHeaders); #else throw ArbiterError(fail); diff --git a/vendor/arbiter/arbiter.hpp b/vendor/arbiter/arbiter.hpp index 012877297e..ade9876360 100644 --- a/vendor/arbiter/arbiter.hpp +++ b/vendor/arbiter/arbiter.hpp @@ -48,6 +48,341 @@ SOFTWARE. #define ARBITER_CUSTOM_NAMESPACE pdal #define ARBITER_EXTERNAL_JSON +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: arbiter/third/gzip/config.hpp +// ////////////////////////////////////////////////////////////////////// + +#pragma once + +#ifndef ZLIB_CONST +#define ZLIB_CONST +#endif + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: arbiter/third/gzip/config.hpp +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: arbiter/third/gzip/utils.hpp +// ////////////////////////////////////////////////////////////////////// + +#include + +namespace gzip { + +// These live in gzip.hpp because it doesnt need to use deps. +// Otherwise, they would need to live in impl files if these methods used +// zlib structures or functions like inflate/deflate) +inline bool is_compressed(const char* data, std::size_t size) +{ + return size > 2 && + ( + // zlib + ( + static_cast(data[0]) == 0x78 && + (static_cast(data[1]) == 0x9C || + static_cast(data[1]) == 0x01 || + static_cast(data[1]) == 0xDA || + static_cast(data[1]) == 0x5E)) || + // gzip + (static_cast(data[0]) == 0x1F && static_cast(data[1]) == 0x8B)); +} +} // namespace gzip + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: arbiter/third/gzip/utils.hpp +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: arbiter/third/gzip/version.hpp +// ////////////////////////////////////////////////////////////////////// + +#pragma once + +/// The major version number +#define GZIP_VERSION_MAJOR 1 + +/// The minor version number +#define GZIP_VERSION_MINOR 0 + +/// The patch number +#define GZIP_VERSION_PATCH 0 + +/// The complete version number +#define GZIP_VERSION_CODE (GZIP_VERSION_MAJOR * 10000 + GZIP_VERSION_MINOR * 100 + GZIP_VERSION_PATCH) + +/// Version number as string +#define GZIP_VERSION_STRING "1.0.0" + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: arbiter/third/gzip/version.hpp +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: arbiter/third/gzip/compress.hpp +// ////////////////////////////////////////////////////////////////////// + +#ifndef ARBITER_IS_AMALGAMATION +#include "config.hpp" +#endif + +// zlib +#include + +// std +#include +#include +#include + +namespace gzip { + +class Compressor +{ + std::size_t max_; + int level_; + + public: + Compressor(int level = Z_DEFAULT_COMPRESSION, + std::size_t max_bytes = 2000000000) // by default refuse operation if uncompressed data is > 2GB + : max_(max_bytes), + level_(level) + { + } + + template + void compress(InputType& output, + const char* data, + std::size_t size) const + { + +#ifdef DEBUG + // Verify if size input will fit into unsigned int, type used for zlib's avail_in + if (size > std::numeric_limits::max()) + { + throw std::runtime_error("size arg is too large to fit into unsigned int type"); + } +#endif + if (size > max_) + { + throw std::runtime_error("size may use more memory than intended when decompressing"); + } + + z_stream deflate_s; + deflate_s.zalloc = Z_NULL; + deflate_s.zfree = Z_NULL; + deflate_s.opaque = Z_NULL; + deflate_s.avail_in = 0; + deflate_s.next_in = Z_NULL; + + // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer). + // It should be in the range 8..15 for this version of the library. + // Larger values of this parameter result in better compression at the expense of memory usage. + // This range of values also changes the decoding type: + // -8 to -15 for raw deflate + // 8 to 15 for zlib + // (8 to 15) + 16 for gzip + // (8 to 15) + 32 to automatically detect gzip/zlib header (decompression/inflate only) + constexpr int window_bits = 15 + 16; // gzip with windowbits of 15 + + constexpr int mem_level = 8; + // The memory requirements for deflate are (in bytes): + // (1 << (window_bits+2)) + (1 << (mem_level+9)) + // with a default value of 8 for mem_level and our window_bits of 15 + // this is 128Kb + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" + if (deflateInit2(&deflate_s, level_, Z_DEFLATED, window_bits, mem_level, Z_DEFAULT_STRATEGY) != Z_OK) + { + throw std::runtime_error("deflate init failed"); + } +#pragma GCC diagnostic pop + + deflate_s.next_in = reinterpret_cast(data); + deflate_s.avail_in = static_cast(size); + + std::size_t size_compressed = 0; + do + { + size_t increase = size / 2 + 1024; + if (output.size() < (size_compressed + increase)) + { + output.resize(size_compressed + increase); + } + // There is no way we see that "increase" would not fit in an unsigned int, + // hence we use static cast here to avoid -Wshorten-64-to-32 error + deflate_s.avail_out = static_cast(increase); + deflate_s.next_out = reinterpret_cast((&output[0] + size_compressed)); + // From http://www.zlib.net/zlib_how.html + // "deflate() has a return value that can indicate errors, yet we do not check it here. + // Why not? Well, it turns out that deflate() can do no wrong here." + // Basically only possible error is from deflateInit not working properly + deflate(&deflate_s, Z_FINISH); + size_compressed += (increase - deflate_s.avail_out); + } while (deflate_s.avail_out == 0); + + deflateEnd(&deflate_s); + output.resize(size_compressed); + } +}; + +inline std::string compress(const char* data, + std::size_t size, + int level = Z_DEFAULT_COMPRESSION) +{ + Compressor comp(level); + std::string output; + comp.compress(output, data, size); + return output; +} + +} // namespace gzip + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: arbiter/third/gzip/compress.hpp +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: arbiter/third/gzip/decompress.hpp +// ////////////////////////////////////////////////////////////////////// + +#ifndef ARBITER_IS_AMALGAMATION +#include "config.hpp" +#endif + +// zlib +#include + +// std +#include +#include +#include + +namespace gzip { + +class Decompressor +{ + std::size_t max_; + + public: + Decompressor(std::size_t max_bytes = 1000000000) // by default refuse operation if compressed data is > 1GB + : max_(max_bytes) + { + } + + template + void decompress(OutputType& output, + const char* data, + std::size_t size) const + { + z_stream inflate_s; + + inflate_s.zalloc = Z_NULL; + inflate_s.zfree = Z_NULL; + inflate_s.opaque = Z_NULL; + inflate_s.avail_in = 0; + inflate_s.next_in = Z_NULL; + + // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer). + // It should be in the range 8..15 for this version of the library. + // Larger values of this parameter result in better compression at the expense of memory usage. + // This range of values also changes the decoding type: + // -8 to -15 for raw deflate + // 8 to 15 for zlib + // (8 to 15) + 16 for gzip + // (8 to 15) + 32 to automatically detect gzip/zlib header + constexpr int window_bits = 15 + 32; // auto with windowbits of 15 + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" + if (inflateInit2(&inflate_s, window_bits) != Z_OK) + { + throw std::runtime_error("inflate init failed"); + } +#pragma GCC diagnostic pop + inflate_s.next_in = reinterpret_cast(data); + +#ifdef DEBUG + // Verify if size (long type) input will fit into unsigned int, type used for zlib's avail_in + std::uint64_t size_64 = size * 2; + if (size_64 > std::numeric_limits::max()) + { + inflateEnd(&inflate_s); + throw std::runtime_error("size arg is too large to fit into unsigned int type x2"); + } +#endif + if (size > max_ || (size * 2) > max_) + { + inflateEnd(&inflate_s); + throw std::runtime_error("size may use more memory than intended when decompressing"); + } + inflate_s.avail_in = static_cast(size); + std::size_t size_uncompressed = 0; + do + { + std::size_t resize_to = size_uncompressed + 2 * size; + if (resize_to > max_) + { + inflateEnd(&inflate_s); + throw std::runtime_error("size of output string will use more memory then intended when decompressing"); + } + output.resize(resize_to); + inflate_s.avail_out = static_cast(2 * size); + inflate_s.next_out = reinterpret_cast(&output[0] + size_uncompressed); + int ret = inflate(&inflate_s, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK && ret != Z_BUF_ERROR) + { + std::string error_msg = inflate_s.msg; + inflateEnd(&inflate_s); + throw std::runtime_error(error_msg); + } + + size_uncompressed += (2 * size - inflate_s.avail_out); + } while (inflate_s.avail_out == 0); + inflateEnd(&inflate_s); + output.resize(size_uncompressed); + } +}; + +inline std::string decompress(const char* data, std::size_t size) +{ + Decompressor decomp; + std::string output; + decomp.decompress(output, data, size); + return output; +} + +} // namespace gzip + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: arbiter/third/gzip/decompress.hpp +// ////////////////////////////////////////////////////////////////////// + + + + + + // ////////////////////////////////////////////////////////////////////// // Beginning of content of file: arbiter/util/exports.hpp // //////////////////////////////////////////////////////////////////////