Skip to content

Commit

Permalink
Update arbiter bundle for gzip decompression support.
Browse files Browse the repository at this point in the history
  • Loading branch information
connormanning authored and kylemann16 committed Sep 11, 2018
1 parent d978d77 commit 5baec4c
Show file tree
Hide file tree
Showing 3 changed files with 364 additions and 1 deletion.
6 changes: 5 additions & 1 deletion vendor/arbiter/CMakeLists.txt
Expand Up @@ -11,6 +11,10 @@ if (CURL_FOUND)
add_definitions("-DARBITER_CURL")
endif()

if (ZLIB_FOUND)
add_definitions("-DARBITER_ZLIB")
endif()

PDAL_ADD_FREE_LIBRARY(${PDAL_ARBITER_LIB_NAME} STATIC arbiter.cpp)
target_include_directories(${PDAL_ARBITER_LIB_NAME} PRIVATE
${PDAL_JSONCPP_INCLUDE_DIR})
Expand All @@ -27,7 +31,7 @@ if (UNIX)
target_compile_options(${PDAL_ARBITER_LIB_NAME} PRIVATE "-fPIC")
else()
target_compile_definitions(${PDAL_ARBITER_LIB_NAME}
PUBLIC -DARBITER_DLL_EXPORT -DARBITER_CURL)
PUBLIC -DARBITER_DLL_EXPORT -DARBITER_CURL -DARBITER_ZLIB)
endif()

set_target_properties(${PDAL_ARBITER_LIB_NAME} PROPERTIES
Expand Down
24 changes: 24 additions & 0 deletions vendor/arbiter/arbiter.cpp
Expand Up @@ -3192,6 +3192,12 @@ std::vector<std::string> Dropbox::glob(std::string path, bool verbose) const
#include <arbiter/util/curl.hpp>
#include <arbiter/util/http.hpp>
#include <arbiter/util/util.hpp>


#ifdef ARBITER_ZLIB
#include <arbiter/third/gzip/decompress.hpp>
#endif

#endif

#ifdef ARBITER_CURL
Expand Down Expand Up @@ -3492,6 +3498,24 @@ Response Curl::get(

// Run the command.
const int httpCode(perform());

for (auto& h : receivedHeaders)
{
std::string& v(h.second);
while (v.size() && v.front() == ' ') v = v.substr(1);
while (v.size() && v.back() == ' ') v.pop_back();
}

if (receivedHeaders["Content-Encoding"] == "gzip")
{
#ifdef ARBITER_ZLIB
std::string s(gzip::decompress(data.data(), data.size()));
data.assign(s.begin(), s.end());
#else
throw ArbiterError("Cannot decompress zlib");
#endif
}

return Response(httpCode, data, receivedHeaders);
#else
throw ArbiterError(fail);
Expand Down
335 changes: 335 additions & 0 deletions vendor/arbiter/arbiter.hpp
Expand Up @@ -48,6 +48,341 @@ SOFTWARE.
#define ARBITER_CUSTOM_NAMESPACE pdal
#define ARBITER_EXTERNAL_JSON

// //////////////////////////////////////////////////////////////////////
// Beginning of content of file: arbiter/third/gzip/config.hpp
// //////////////////////////////////////////////////////////////////////

#pragma once

#ifndef ZLIB_CONST
#define ZLIB_CONST
#endif

// //////////////////////////////////////////////////////////////////////
// End of content of file: arbiter/third/gzip/config.hpp
// //////////////////////////////////////////////////////////////////////






// //////////////////////////////////////////////////////////////////////
// Beginning of content of file: arbiter/third/gzip/utils.hpp
// //////////////////////////////////////////////////////////////////////

#include <cstdlib>

namespace gzip {

// These live in gzip.hpp because it doesnt need to use deps.
// Otherwise, they would need to live in impl files if these methods used
// zlib structures or functions like inflate/deflate)
inline bool is_compressed(const char* data, std::size_t size)
{
return size > 2 &&
(
// zlib
(
static_cast<uint8_t>(data[0]) == 0x78 &&
(static_cast<uint8_t>(data[1]) == 0x9C ||
static_cast<uint8_t>(data[1]) == 0x01 ||
static_cast<uint8_t>(data[1]) == 0xDA ||
static_cast<uint8_t>(data[1]) == 0x5E)) ||
// gzip
(static_cast<uint8_t>(data[0]) == 0x1F && static_cast<uint8_t>(data[1]) == 0x8B));
}
} // namespace gzip

// //////////////////////////////////////////////////////////////////////
// End of content of file: arbiter/third/gzip/utils.hpp
// //////////////////////////////////////////////////////////////////////






// //////////////////////////////////////////////////////////////////////
// Beginning of content of file: arbiter/third/gzip/version.hpp
// //////////////////////////////////////////////////////////////////////

#pragma once

/// The major version number
#define GZIP_VERSION_MAJOR 1

/// The minor version number
#define GZIP_VERSION_MINOR 0

/// The patch number
#define GZIP_VERSION_PATCH 0

/// The complete version number
#define GZIP_VERSION_CODE (GZIP_VERSION_MAJOR * 10000 + GZIP_VERSION_MINOR * 100 + GZIP_VERSION_PATCH)

/// Version number as string
#define GZIP_VERSION_STRING "1.0.0"

// //////////////////////////////////////////////////////////////////////
// End of content of file: arbiter/third/gzip/version.hpp
// //////////////////////////////////////////////////////////////////////






// //////////////////////////////////////////////////////////////////////
// Beginning of content of file: arbiter/third/gzip/compress.hpp
// //////////////////////////////////////////////////////////////////////

#ifndef ARBITER_IS_AMALGAMATION
#include "config.hpp"
#endif

// zlib
#include <zlib.h>

// std
#include <limits>
#include <stdexcept>
#include <string>

namespace gzip {

class Compressor
{
std::size_t max_;
int level_;

public:
Compressor(int level = Z_DEFAULT_COMPRESSION,
std::size_t max_bytes = 2000000000) // by default refuse operation if uncompressed data is > 2GB
: max_(max_bytes),
level_(level)
{
}

template <typename InputType>
void compress(InputType& output,
const char* data,
std::size_t size) const
{

#ifdef DEBUG
// Verify if size input will fit into unsigned int, type used for zlib's avail_in
if (size > std::numeric_limits<unsigned int>::max())
{
throw std::runtime_error("size arg is too large to fit into unsigned int type");
}
#endif
if (size > max_)
{
throw std::runtime_error("size may use more memory than intended when decompressing");
}

z_stream deflate_s;
deflate_s.zalloc = Z_NULL;
deflate_s.zfree = Z_NULL;
deflate_s.opaque = Z_NULL;
deflate_s.avail_in = 0;
deflate_s.next_in = Z_NULL;

// The windowBits parameter is the base two logarithm of the window size (the size of the history buffer).
// It should be in the range 8..15 for this version of the library.
// Larger values of this parameter result in better compression at the expense of memory usage.
// This range of values also changes the decoding type:
// -8 to -15 for raw deflate
// 8 to 15 for zlib
// (8 to 15) + 16 for gzip
// (8 to 15) + 32 to automatically detect gzip/zlib header (decompression/inflate only)
constexpr int window_bits = 15 + 16; // gzip with windowbits of 15

constexpr int mem_level = 8;
// The memory requirements for deflate are (in bytes):
// (1 << (window_bits+2)) + (1 << (mem_level+9))
// with a default value of 8 for mem_level and our window_bits of 15
// this is 128Kb

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
if (deflateInit2(&deflate_s, level_, Z_DEFLATED, window_bits, mem_level, Z_DEFAULT_STRATEGY) != Z_OK)
{
throw std::runtime_error("deflate init failed");
}
#pragma GCC diagnostic pop

deflate_s.next_in = reinterpret_cast<z_const Bytef*>(data);
deflate_s.avail_in = static_cast<unsigned int>(size);

std::size_t size_compressed = 0;
do
{
size_t increase = size / 2 + 1024;
if (output.size() < (size_compressed + increase))
{
output.resize(size_compressed + increase);
}
// There is no way we see that "increase" would not fit in an unsigned int,
// hence we use static cast here to avoid -Wshorten-64-to-32 error
deflate_s.avail_out = static_cast<unsigned int>(increase);
deflate_s.next_out = reinterpret_cast<Bytef*>((&output[0] + size_compressed));
// From http://www.zlib.net/zlib_how.html
// "deflate() has a return value that can indicate errors, yet we do not check it here.
// Why not? Well, it turns out that deflate() can do no wrong here."
// Basically only possible error is from deflateInit not working properly
deflate(&deflate_s, Z_FINISH);
size_compressed += (increase - deflate_s.avail_out);
} while (deflate_s.avail_out == 0);

deflateEnd(&deflate_s);
output.resize(size_compressed);
}
};

inline std::string compress(const char* data,
std::size_t size,
int level = Z_DEFAULT_COMPRESSION)
{
Compressor comp(level);
std::string output;
comp.compress(output, data, size);
return output;
}

} // namespace gzip

// //////////////////////////////////////////////////////////////////////
// End of content of file: arbiter/third/gzip/compress.hpp
// //////////////////////////////////////////////////////////////////////






// //////////////////////////////////////////////////////////////////////
// Beginning of content of file: arbiter/third/gzip/decompress.hpp
// //////////////////////////////////////////////////////////////////////

#ifndef ARBITER_IS_AMALGAMATION
#include "config.hpp"
#endif

// zlib
#include <zlib.h>

// std
#include <limits>
#include <stdexcept>
#include <string>

namespace gzip {

class Decompressor
{
std::size_t max_;

public:
Decompressor(std::size_t max_bytes = 1000000000) // by default refuse operation if compressed data is > 1GB
: max_(max_bytes)
{
}

template <typename OutputType>
void decompress(OutputType& output,
const char* data,
std::size_t size) const
{
z_stream inflate_s;

inflate_s.zalloc = Z_NULL;
inflate_s.zfree = Z_NULL;
inflate_s.opaque = Z_NULL;
inflate_s.avail_in = 0;
inflate_s.next_in = Z_NULL;

// The windowBits parameter is the base two logarithm of the window size (the size of the history buffer).
// It should be in the range 8..15 for this version of the library.
// Larger values of this parameter result in better compression at the expense of memory usage.
// This range of values also changes the decoding type:
// -8 to -15 for raw deflate
// 8 to 15 for zlib
// (8 to 15) + 16 for gzip
// (8 to 15) + 32 to automatically detect gzip/zlib header
constexpr int window_bits = 15 + 32; // auto with windowbits of 15

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
if (inflateInit2(&inflate_s, window_bits) != Z_OK)
{
throw std::runtime_error("inflate init failed");
}
#pragma GCC diagnostic pop
inflate_s.next_in = reinterpret_cast<z_const Bytef*>(data);

#ifdef DEBUG
// Verify if size (long type) input will fit into unsigned int, type used for zlib's avail_in
std::uint64_t size_64 = size * 2;
if (size_64 > std::numeric_limits<unsigned int>::max())
{
inflateEnd(&inflate_s);
throw std::runtime_error("size arg is too large to fit into unsigned int type x2");
}
#endif
if (size > max_ || (size * 2) > max_)
{
inflateEnd(&inflate_s);
throw std::runtime_error("size may use more memory than intended when decompressing");
}
inflate_s.avail_in = static_cast<unsigned int>(size);
std::size_t size_uncompressed = 0;
do
{
std::size_t resize_to = size_uncompressed + 2 * size;
if (resize_to > max_)
{
inflateEnd(&inflate_s);
throw std::runtime_error("size of output string will use more memory then intended when decompressing");
}
output.resize(resize_to);
inflate_s.avail_out = static_cast<unsigned int>(2 * size);
inflate_s.next_out = reinterpret_cast<Bytef*>(&output[0] + size_uncompressed);
int ret = inflate(&inflate_s, Z_FINISH);
if (ret != Z_STREAM_END && ret != Z_OK && ret != Z_BUF_ERROR)
{
std::string error_msg = inflate_s.msg;
inflateEnd(&inflate_s);
throw std::runtime_error(error_msg);
}

size_uncompressed += (2 * size - inflate_s.avail_out);
} while (inflate_s.avail_out == 0);
inflateEnd(&inflate_s);
output.resize(size_uncompressed);
}
};

inline std::string decompress(const char* data, std::size_t size)
{
Decompressor decomp;
std::string output;
decomp.decompress(output, data, size);
return output;
}

} // namespace gzip

// //////////////////////////////////////////////////////////////////////
// End of content of file: arbiter/third/gzip/decompress.hpp
// //////////////////////////////////////////////////////////////////////






// //////////////////////////////////////////////////////////////////////
// Beginning of content of file: arbiter/util/exports.hpp
// //////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 5baec4c

Please sign in to comment.